OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 2147 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2158 // TODO(wala): Determine the best lowering sequences for each type. | 2158 // TODO(wala): Determine the best lowering sequences for each type. |
2159 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 2159 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
2160 // Lower extractelement operations where the element is 32 bits | 2160 // Lower extractelement operations where the element is 32 bits |
2161 // wide with pshufd. | 2161 // wide with pshufd. |
2162 // TODO(wala): SSE4.1 has extractps and pextrd | 2162 // TODO(wala): SSE4.1 has extractps and pextrd |
2163 // | 2163 // |
2164 // ALIGNHACK: Force vector operands to registers in instructions that | 2164 // ALIGNHACK: Force vector operands to registers in instructions that |
2165 // require aligned memory operands until support for stack alignment | 2165 // require aligned memory operands until support for stack alignment |
2166 // is implemented. | 2166 // is implemented. |
2167 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) | 2167 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) |
2168 Operand *T = NULL; | 2168 Variable *T = NULL; |
2169 if (Index) { | 2169 if (Index) { |
2170 // The shuffle only needs to occur if the element to be extracted | 2170 // The shuffle only needs to occur if the element to be extracted |
2171 // is not at the lowest index. | 2171 // is not at the lowest index. |
2172 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); | 2172 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); |
2173 T = makeReg(Ty); | 2173 T = makeReg(Ty); |
2174 _pshufd(llvm::cast<Variable>(T), ALIGN_HACK(SourceVectOperand), Mask); | 2174 _pshufd(T, ALIGN_HACK(SourceVectOperand), Mask); |
2175 } else { | 2175 } else { |
2176 // TODO(wala): If SourceVectOperand is in memory, express it as | |
2177 // mem32 so that the call to legalizeToVar() is made unnecessary. | |
2178 // _movd and _movss only take mem32 memory operands. | |
2179 T = legalizeToVar(SourceVectOperand); | 2176 T = legalizeToVar(SourceVectOperand); |
2180 } | 2177 } |
2181 | 2178 |
2182 if (InVectorElementTy == IceType_i32) { | 2179 if (InVectorElementTy == IceType_i32) { |
2183 _movd(ExtractedElement, T); | 2180 _movd(ExtractedElement, T); |
2184 } else { // InVectorElementTy == IceType_f32 | 2181 } else { // Ty == Icetype_f32 |
2185 // TODO: _mov should be able to be used here. | 2182 // TODO(wala): _movss is only used here because _mov does not |
| 2183 // allow a vector source and a scalar destination. _mov should be |
| 2184 // able to be used here. |
| 2185 // _movss is a binary instruction, so the FakeDef is needed to |
| 2186 // keep the live range analysis consistent. |
| 2187 Context.insert(InstFakeDef::create(Func, ExtractedElement)); |
2186 _movss(ExtractedElement, T); | 2188 _movss(ExtractedElement, T); |
2187 } | 2189 } |
2188 #undef ALIGN_HACK | 2190 #undef ALIGN_HACK |
2189 } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) { | 2191 } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) { |
2190 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); | 2192 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); |
2191 _pextrw(ExtractedElement, legalizeToVar(SourceVectOperand), Mask); | 2193 _pextrw(ExtractedElement, legalizeToVar(SourceVectOperand), Mask); |
2192 } else { | 2194 } else { |
2193 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); | 2195 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); |
2194 // Spill the value to a stack slot and do the extraction in memory. | 2196 // Spill the value to a stack slot and do the extraction in memory. |
2195 // TODO(wala): SSE4.1 has pextrb. | 2197 // TODO(wala): SSE4.1 has pextrb. |
(...skipping 318 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2514 } | 2516 } |
2515 | 2517 |
2516 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { | 2518 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { |
2517 Operand *SourceVectOperand = Inst->getSrc(0); | 2519 Operand *SourceVectOperand = Inst->getSrc(0); |
2518 Operand *ElementToInsert = Inst->getSrc(1); | 2520 Operand *ElementToInsert = Inst->getSrc(1); |
2519 ConstantInteger *ElementIndex = | 2521 ConstantInteger *ElementIndex = |
2520 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2)); | 2522 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2)); |
2521 // Only constant indices are allowed in PNaCl IR. | 2523 // Only constant indices are allowed in PNaCl IR. |
2522 assert(ElementIndex); | 2524 assert(ElementIndex); |
2523 unsigned Index = ElementIndex->getValue(); | 2525 unsigned Index = ElementIndex->getValue(); |
| 2526 assert(Index < typeNumElements(SourceVectOperand->getType())); |
2524 | 2527 |
2525 Type Ty = SourceVectOperand->getType(); | 2528 Type Ty = SourceVectOperand->getType(); |
2526 Type ElementTy = typeElementType(Ty); | 2529 Type ElementTy = typeElementType(Ty); |
2527 Type InVectorElementTy = getInVectorElementType(Ty); | 2530 Type InVectorElementTy = getInVectorElementType(Ty); |
2528 | 2531 |
2529 if (ElementTy == IceType_i1) { | 2532 if (ElementTy == IceType_i1) { |
2530 // Expand the element to the appropriate size for it to be inserted | 2533 // Expand the element to the appropriate size for it to be inserted |
2531 // in the vector. | 2534 // in the vector. |
2532 Variable *Expanded = | 2535 Variable *Expanded = |
2533 Func->makeVariable(InVectorElementTy, Context.getNode()); | 2536 Func->makeVariable(InVectorElementTy, Context.getNode()); |
2534 InstCast *Cast = | 2537 InstCast *Cast = |
2535 InstCast::create(Func, InstCast::Zext, Expanded, ElementToInsert); | 2538 InstCast::create(Func, InstCast::Zext, Expanded, ElementToInsert); |
2536 lowerCast(Cast); | 2539 lowerCast(Cast); |
2537 ElementToInsert = Expanded; | 2540 ElementToInsert = Expanded; |
2538 } | 2541 } |
2539 | 2542 |
2540 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 2543 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
2541 // Lower insertelement with 32-bit wide elements using shufps. | 2544 // Lower insertelement with 32-bit wide elements using shufps or |
| 2545 // movss. |
2542 // TODO(wala): SSE4.1 has pinsrd and insertps. | 2546 // TODO(wala): SSE4.1 has pinsrd and insertps. |
2543 Variable *Element = NULL; | 2547 Variable *Element = NULL; |
2544 if (InVectorElementTy == IceType_f32) { | 2548 if (InVectorElementTy == IceType_f32) { |
2545 // Element will be in an XMM register since it is floating point. | 2549 // Element will be in an XMM register since it is floating point. |
2546 Element = legalizeToVar(ElementToInsert); | 2550 Element = legalizeToVar(ElementToInsert); |
2547 } else { | 2551 } else { |
2548 // Copy an integer to an XMM register. | 2552 // Copy an integer to an XMM register. |
2549 Operand *T = legalize(ElementToInsert, Legal_Reg | Legal_Mem); | 2553 Operand *T = legalize(ElementToInsert, Legal_Reg | Legal_Mem); |
2550 Element = makeReg(Ty); | 2554 Element = makeReg(Ty); |
2551 _movd(Element, T); | 2555 _movd(Element, T); |
2552 } | 2556 } |
2553 | 2557 |
| 2558 if (Index == 0) { |
| 2559 Variable *T = makeReg(Ty); |
| 2560 _movp(T, SourceVectOperand); |
| 2561 _movss(T, Element); |
| 2562 _movp(Inst->getDest(), T); |
| 2563 return; |
| 2564 } |
| 2565 |
2554 // shufps treats the source and desination operands as vectors of | 2566 // shufps treats the source and desination operands as vectors of |
2555 // four doublewords. The destination's two high doublewords are | 2567 // four doublewords. The destination's two high doublewords are |
2556 // selected from the source operand and the two low doublewords are | 2568 // selected from the source operand and the two low doublewords are |
2557 // selected from the (original value of) the destination operand. | 2569 // selected from the (original value of) the destination operand. |
2558 // An insertelement operation can be effected with a sequence of two | 2570 // An insertelement operation can be effected with a sequence of two |
2559 // shufps operations with appropriate masks. In all cases below, | 2571 // shufps operations with appropriate masks. In all cases below, |
2560 // Element[0] is being inserted into SourceVectOperand. Indices are | 2572 // Element[0] is being inserted into SourceVectOperand. Indices are |
2561 // ordered from left to right. | 2573 // ordered from left to right. |
2562 // | 2574 // |
2563 // insertelement into index 0 (result is stored in Element): | |
2564 // Element := Element[0, 0] SourceVectOperand[0, 1] | |
2565 // Element := Element[0, 3] SourceVectOperand[2, 3] | |
2566 // | |
2567 // insertelement into index 1 (result is stored in Element): | 2575 // insertelement into index 1 (result is stored in Element): |
2568 // Element := Element[0, 0] SourceVectOperand[0, 0] | 2576 // Element := Element[0, 0] SourceVectOperand[0, 0] |
2569 // Element := Element[3, 0] SourceVectOperand[2, 3] | 2577 // Element := Element[3, 0] SourceVectOperand[2, 3] |
2570 // | 2578 // |
2571 // insertelement into index 2 (result is stored in T): | 2579 // insertelement into index 2 (result is stored in T): |
2572 // T := SourceVectOperand | 2580 // T := SourceVectOperand |
2573 // Element := Element[0, 0] T[0, 3] | 2581 // Element := Element[0, 0] T[0, 3] |
2574 // T := T[0, 1] Element[0, 3] | 2582 // T := T[0, 1] Element[0, 3] |
2575 // | 2583 // |
2576 // insertelement into index 3 (result is stored in T): | 2584 // insertelement into index 3 (result is stored in T): |
2577 // T := SourceVectOperand | 2585 // T := SourceVectOperand |
2578 // Element := Element[0, 0] T[0, 2] | 2586 // Element := Element[0, 0] T[0, 2] |
2579 // T := T[0, 1] Element[3, 0] | 2587 // T := T[0, 1] Element[3, 0] |
2580 const unsigned char Mask1[4] = {64, 0, 192, 128}; | 2588 const unsigned char Mask1[3] = {0, 192, 128}; |
2581 const unsigned char Mask2[4] = {236, 227, 196, 52}; | 2589 const unsigned char Mask2[3] = {227, 196, 52}; |
2582 | 2590 |
2583 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index]); | 2591 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); |
2584 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index]); | 2592 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); |
2585 | 2593 |
2586 // ALIGNHACK: Force vector operands to registers in instructions that | 2594 // ALIGNHACK: Force vector operands to registers in instructions that |
2587 // require aligned memory operands until support for stack alignment | 2595 // require aligned memory operands until support for stack alignment |
2588 // is implemented. | 2596 // is implemented. |
2589 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) | 2597 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) |
2590 if (Index < 2) { | 2598 if (Index == 1) { |
2591 SourceVectOperand = ALIGN_HACK(SourceVectOperand); | 2599 SourceVectOperand = ALIGN_HACK(SourceVectOperand); |
2592 _shufps(Element, SourceVectOperand, Mask1Constant); | 2600 _shufps(Element, SourceVectOperand, Mask1Constant); |
2593 _shufps(Element, SourceVectOperand, Mask2Constant); | 2601 _shufps(Element, SourceVectOperand, Mask2Constant); |
2594 _movp(Inst->getDest(), Element); | 2602 _movp(Inst->getDest(), Element); |
2595 } else { | 2603 } else { |
2596 Variable *T = makeReg(Ty); | 2604 Variable *T = makeReg(Ty); |
2597 _movp(T, SourceVectOperand); | 2605 _movp(T, SourceVectOperand); |
2598 _shufps(Element, T, Mask1Constant); | 2606 _shufps(Element, T, Mask1Constant); |
2599 _shufps(T, Element, Mask2Constant); | 2607 _shufps(T, Element, Mask2Constant); |
2600 _movp(Inst->getDest(), T); | 2608 _movp(Inst->getDest(), T); |
(...skipping 1409 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4010 for (SizeT i = 0; i < Size; ++i) { | 4018 for (SizeT i = 0; i < Size; ++i) { |
4011 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 4019 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
4012 } | 4020 } |
4013 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 4021 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
4014 } | 4022 } |
4015 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 4023 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
4016 << "\n"; | 4024 << "\n"; |
4017 } | 4025 } |
4018 | 4026 |
4019 } // end of namespace Ice | 4027 } // end of namespace Ice |
OLD | NEW |