| Index: src/IceTargetLoweringX8632.cpp
|
| diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
|
| index 2b14a65c96a243bf997cad303ca7e05f4ddb7881..af9ebc5a1dc79f02a42d83abc454ec12fb9ad874 100644
|
| --- a/src/IceTargetLoweringX8632.cpp
|
| +++ b/src/IceTargetLoweringX8632.cpp
|
| @@ -85,6 +85,27 @@ InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
|
| return TableIcmp32[Index].Mapping;
|
| }
|
|
|
| +const struct TableTypeX8632Attributes_ {
|
| + Type InVectorElementType;
|
| +} TableTypeX8632Attributes[] = {
|
| +#define X(tag, elementty, cvt, sdss, pack, width) \
|
| + { elementty } \
|
| + ,
|
| + ICETYPEX8632_TABLE
|
| +#undef X
|
| + };
|
| +const size_t TableTypeX8632AttributesSize =
|
| + llvm::array_lengthof(TableTypeX8632Attributes);
|
| +
|
| +// Return the type which the elements of the vector have in the X86
|
| +// representation of the vector.
|
| +Type getInVectorElementType(Type Ty) {
|
| + assert(isVectorType(Ty));
|
| + size_t Index = static_cast<size_t>(Ty);
|
| + assert(Index < TableTypeX8632AttributesSize);
|
| + return TableTypeX8632Attributes[Ty].InVectorElementType;
|
| +}
|
| +
|
| // The maximum number of arguments to pass in XMM registers
|
| const unsigned X86_MAX_XMM_ARGS = 4;
|
| // The number of bits in a byte
|
| @@ -173,7 +194,7 @@ void xMacroIntegrityCheck() {
|
| // Define a temporary set of enum values based on low-level
|
| // table entries.
|
| enum _tmp_enum {
|
| -#define X(tag, cvt, sdss, pack, width) _tmp_##tag,
|
| +#define X(tag, elementty, cvt, sdss, pack, width) _tmp_##tag,
|
| ICETYPEX8632_TABLE
|
| #undef X
|
| _num
|
| @@ -185,7 +206,7 @@ void xMacroIntegrityCheck() {
|
| #undef X
|
| // Define a set of constants based on low-level table entries,
|
| // and ensure the table entry keys are consistent.
|
| -#define X(tag, cvt, sdss, pack, width) \
|
| +#define X(tag, elementty, cvt, sdss, pack, width) \
|
| static const int _table2_##tag = _tmp_##tag; \
|
| STATIC_ASSERT(_table1_##tag == _table2_##tag);
|
| ICETYPEX8632_TABLE;
|
| @@ -2107,6 +2128,85 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
|
| }
|
| }
|
|
|
| +void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
|
| + Operand *SourceVectOperand = Inst->getSrc(0);
|
| + ConstantInteger *ElementIndex =
|
| + llvm::dyn_cast<ConstantInteger>(Inst->getSrc(1));
|
| + // Only constant indices are allowed in PNaCl IR.
|
| + assert(ElementIndex);
|
| +
|
| + unsigned Index = ElementIndex->getValue();
|
| + Type Ty = SourceVectOperand->getType();
|
| + Type ElementTy = typeElementType(Ty);
|
| + Type InVectorElementTy = getInVectorElementType(Ty);
|
| + Variable *ExtractedElement = makeReg(InVectorElementTy);
|
| +
|
| + // TODO(wala): Determine the best lowering sequences for each type.
|
| + if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
|
| + // Lower extractelement operations where the element is 32 bits
|
| + // wide with pshufd.
|
| + // TODO(wala): SSE4.1 has extractps and pextrd
|
| + //
|
| + // ALIGNHACK: Force vector operands to registers in instructions that
|
| + // require aligned memory operands until support for stack alignment
|
| + // is implemented.
|
| +#define ALIGN_HACK(Vect) legalizeToVar((Vect))
|
| + Operand *T = NULL;
|
| + if (Index) {
|
| + // The shuffle only needs to occur if the element to be extracted
|
| + // is not at the lowest index.
|
| + Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
|
| + T = makeReg(Ty);
|
| + _pshufd(llvm::cast<Variable>(T), ALIGN_HACK(SourceVectOperand), Mask);
|
| + } else {
|
| + // TODO(wala): If SourceVectOperand is in memory, express it as
|
| + // mem32 so that the call to legalizeToVar() is made unnecessary.
|
| + // _movd and _movss only take mem32 memory operands.
|
| + T = legalizeToVar(SourceVectOperand);
|
| + }
|
| +
|
| + if (InVectorElementTy == IceType_i32) {
|
| + _movd(ExtractedElement, T);
|
| + } else { // InVectorElementTy == IceType_f32
|
| + // TODO: _mov should be able to be used here.
|
| + _movss(ExtractedElement, T);
|
| + }
|
| +#undef ALIGN_HACK
|
| + } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) {
|
| + Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
|
| + _pextrw(ExtractedElement, legalizeToVar(SourceVectOperand), Mask);
|
| + } else {
|
| + assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
|
| + // Spill the value to a stack slot and do the extraction in memory.
|
| + // TODO(wala): SSE4.1 has pextrb.
|
| + //
|
| + // TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when
|
| + // support for legalizing to mem is implemented.
|
| + Variable *Slot = Func->makeVariable(Ty, Context.getNode());
|
| + Slot->setWeight(RegWeight::Zero);
|
| + _movp(Slot, legalizeToVar(SourceVectOperand));
|
| +
|
| + // Compute the location of the element in memory.
|
| + unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
|
| + OperandX8632Mem *Loc =
|
| + getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
|
| + _mov(ExtractedElement, Loc);
|
| + }
|
| +
|
| + if (ElementTy == IceType_i1) {
|
| + // Truncate extracted integers to i1s if necessary.
|
| + Variable *T = makeReg(IceType_i1);
|
| + InstCast *Cast =
|
| + InstCast::create(Func, InstCast::Trunc, T, ExtractedElement);
|
| + lowerCast(Cast);
|
| + ExtractedElement = T;
|
| + }
|
| +
|
| + // Copy the element to the destination.
|
| + Variable *Dest = Inst->getDest();
|
| + _mov(Dest, ExtractedElement);
|
| +}
|
| +
|
| void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
|
| Operand *Src0 = Inst->getSrc(0);
|
| Operand *Src1 = Inst->getSrc(1);
|
| @@ -2238,6 +2338,123 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
|
| Context.insert(Label);
|
| }
|
|
|
| +void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
|
| + Operand *SourceVectOperand = Inst->getSrc(0);
|
| + Operand *ElementToInsert = Inst->getSrc(1);
|
| + ConstantInteger *ElementIndex =
|
| + llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2));
|
| + // Only constant indices are allowed in PNaCl IR.
|
| + assert(ElementIndex);
|
| + unsigned Index = ElementIndex->getValue();
|
| +
|
| + Type Ty = SourceVectOperand->getType();
|
| + Type ElementTy = typeElementType(Ty);
|
| + Type InVectorElementTy = getInVectorElementType(Ty);
|
| +
|
| + if (ElementTy == IceType_i1) {
|
| + // Expand the element to the appropriate size for it to be inserted
|
| + // in the vector.
|
| + Variable *Expanded =
|
| + Func->makeVariable(InVectorElementTy, Context.getNode());
|
| + InstCast *Cast =
|
| + InstCast::create(Func, InstCast::Zext, Expanded, ElementToInsert);
|
| + lowerCast(Cast);
|
| + ElementToInsert = Expanded;
|
| + }
|
| +
|
| + if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
|
| + // Lower insertelement with 32-bit wide elements using shufps.
|
| + // TODO(wala): SSE4.1 has pinsrd and insertps.
|
| + Variable *Element = NULL;
|
| + if (InVectorElementTy == IceType_f32) {
|
| + // Element will be in an XMM register since it is floating point.
|
| + Element = legalizeToVar(ElementToInsert);
|
| + } else {
|
| + // Copy an integer to an XMM register.
|
| + Operand *T = legalize(ElementToInsert, Legal_Reg | Legal_Mem);
|
| + Element = makeReg(Ty);
|
| + _movd(Element, T);
|
| + }
|
| +
|
| + // shufps treats the source and desination operands as vectors of
|
| + // four doublewords. The destination's two high doublewords are
|
| + // selected from the source operand and the two low doublewords are
|
| + // selected from the (original value of) the destination operand.
|
| + // An insertelement operation can be effected with a sequence of two
|
| + // shufps operations with appropriate masks. In all cases below,
|
| + // Element[0] is being inserted into SourceVectOperand. Indices are
|
| + // ordered from left to right.
|
| + //
|
| + // insertelement into index 0 (result is stored in Element):
|
| + // Element := Element[0, 0] SourceVectOperand[0, 1]
|
| + // Element := Element[0, 3] SourceVectOperand[2, 3]
|
| + //
|
| + // insertelement into index 1 (result is stored in Element):
|
| + // Element := Element[0, 0] SourceVectOperand[0, 0]
|
| + // Element := Element[3, 0] SourceVectOperand[2, 3]
|
| + //
|
| + // insertelement into index 2 (result is stored in T):
|
| + // T := SourceVectOperand
|
| + // Element := Element[0, 0] T[0, 3]
|
| + // T := T[0, 1] Element[0, 3]
|
| + //
|
| + // insertelement into index 3 (result is stored in T):
|
| + // T := SourceVectOperand
|
| + // Element := Element[0, 0] T[0, 2]
|
| + // T := T[0, 1] Element[3, 0]
|
| + const unsigned char Mask1[4] = {64, 0, 192, 128};
|
| + const unsigned char Mask2[4] = {236, 227, 196, 52};
|
| +
|
| + Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index]);
|
| + Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index]);
|
| +
|
| + // ALIGNHACK: Force vector operands to registers in instructions that
|
| + // require aligned memory operands until support for stack alignment
|
| + // is implemented.
|
| +#define ALIGN_HACK(Vect) legalizeToVar((Vect))
|
| + if (Index < 2) {
|
| + SourceVectOperand = ALIGN_HACK(SourceVectOperand);
|
| + _shufps(Element, SourceVectOperand, Mask1Constant);
|
| + _shufps(Element, SourceVectOperand, Mask2Constant);
|
| + _movp(Inst->getDest(), Element);
|
| + } else {
|
| + Variable *T = makeReg(Ty);
|
| + _movp(T, SourceVectOperand);
|
| + _shufps(Element, T, Mask1Constant);
|
| + _shufps(T, Element, Mask2Constant);
|
| + _movp(Inst->getDest(), T);
|
| + }
|
| +#undef ALIGN_HACK
|
| + } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) {
|
| + Operand *Element = legalize(ElementToInsert, Legal_Mem | Legal_Reg);
|
| + Variable *T = makeReg(Ty);
|
| + _movp(T, SourceVectOperand);
|
| + _pinsrw(T, Element, Ctx->getConstantInt(IceType_i8, Index));
|
| + _movp(Inst->getDest(), T);
|
| + } else {
|
| + assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
|
| + // Spill the value to a stack slot and perform the insertion in
|
| + // memory.
|
| + // TODO(wala): SSE4.1 has pinsrb.
|
| + //
|
| + // TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when
|
| + // support for legalizing to mem is implemented.
|
| + Variable *Slot = Func->makeVariable(Ty, Context.getNode());
|
| + Slot->setWeight(RegWeight::Zero);
|
| + _movp(Slot, legalizeToVar(SourceVectOperand));
|
| +
|
| + // Compute the location of the position to insert in memory.
|
| + unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
|
| + OperandX8632Mem *Loc =
|
| + getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
|
| + _store(legalizeToVar(ElementToInsert), Loc);
|
| +
|
| + Variable *T = makeReg(Ty);
|
| + _movp(T, Slot);
|
| + _movp(Inst->getDest(), T);
|
| + }
|
| +}
|
| +
|
| void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
|
| switch (Instr->getIntrinsicInfo().ID) {
|
| case Intrinsics::AtomicCmpxchg: {
|
| @@ -3169,6 +3386,23 @@ Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {
|
| return Dest;
|
| }
|
|
|
| +OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
|
| + Variable *Slot,
|
| + uint32_t Offset) {
|
| + // Ensure that Loc is a stack slot.
|
| + assert(Slot->getWeight() == RegWeight::Zero);
|
| + assert(Slot->getRegNum() == Variable::NoRegister);
|
| + // Compute the location of Loc in memory.
|
| + // TODO(wala,stichnot): lea should not be required. The address of
|
| + // the stack slot is known at compile time (although not until after
|
| + // addProlog()).
|
| + const Type PointerType = IceType_i32;
|
| + Variable *Loc = makeReg(PointerType);
|
| + _lea(Loc, Slot);
|
| + Constant *ConstantOffset = Ctx->getConstantInt(IceType_i32, Offset);
|
| + return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
|
| +}
|
| +
|
| // Helper for legalize() to emit the right code to lower an operand to a
|
| // register of the appropriate type.
|
| Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
|
|
|