Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(35)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 412353005: Use movss to implement insertelement when elements = 4 and index = 0. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 2147 matching lines...) Expand 10 before | Expand all | Expand 10 after
2158 // TODO(wala): Determine the best lowering sequences for each type. 2158 // TODO(wala): Determine the best lowering sequences for each type.
2159 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2159 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2160 // Lower extractelement operations where the element is 32 bits 2160 // Lower extractelement operations where the element is 32 bits
2161 // wide with pshufd. 2161 // wide with pshufd.
2162 // TODO(wala): SSE4.1 has extractps and pextrd 2162 // TODO(wala): SSE4.1 has extractps and pextrd
2163 // 2163 //
2164 // ALIGNHACK: Force vector operands to registers in instructions that 2164 // ALIGNHACK: Force vector operands to registers in instructions that
2165 // require aligned memory operands until support for stack alignment 2165 // require aligned memory operands until support for stack alignment
2166 // is implemented. 2166 // is implemented.
2167 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) 2167 #define ALIGN_HACK(Vect) legalizeToVar((Vect))
2168 Operand *T = NULL; 2168 Variable *T = NULL;
2169 if (Index) { 2169 if (Index) {
2170 // The shuffle only needs to occur if the element to be extracted 2170 // The shuffle only needs to occur if the element to be extracted
2171 // is not at the lowest index. 2171 // is not at the lowest index.
2172 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); 2172 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
2173 T = makeReg(Ty); 2173 T = makeReg(Ty);
2174 _pshufd(llvm::cast<Variable>(T), ALIGN_HACK(SourceVectOperand), Mask); 2174 _pshufd(T, ALIGN_HACK(SourceVectOperand), Mask);
2175 } else { 2175 } else {
2176 // TODO(wala): If SourceVectOperand is in memory, express it as
2177 // mem32 so that the call to legalizeToVar() is made unnecessary.
2178 // _movd and _movss only take mem32 memory operands.
2179 T = legalizeToVar(SourceVectOperand); 2176 T = legalizeToVar(SourceVectOperand);
2180 } 2177 }
2181 2178
2182 if (InVectorElementTy == IceType_i32) { 2179 if (InVectorElementTy == IceType_i32) {
2183 _movd(ExtractedElement, T); 2180 _movd(ExtractedElement, T);
2184 } else { // InVectorElementTy == IceType_f32 2181 } else { // Ty == Icetype_f32
2185 // TODO: _mov should be able to be used here. 2182 // TODO(wala): _movss is only used here because _mov does not
2183 // allow a vector source and a scalar destination. _mov should be
2184 // able to be used here.
2185 // _movss is a binary instruction, so the FakeDef is needed to
2186 // keep the live range analysis consistent.
2187 Context.insert(InstFakeDef::create(Func, ExtractedElement));
2186 _movss(ExtractedElement, T); 2188 _movss(ExtractedElement, T);
2187 } 2189 }
2188 #undef ALIGN_HACK 2190 #undef ALIGN_HACK
2189 } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) { 2191 } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) {
2190 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); 2192 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
2191 _pextrw(ExtractedElement, legalizeToVar(SourceVectOperand), Mask); 2193 _pextrw(ExtractedElement, legalizeToVar(SourceVectOperand), Mask);
2192 } else { 2194 } else {
2193 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); 2195 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2194 // Spill the value to a stack slot and do the extraction in memory. 2196 // Spill the value to a stack slot and do the extraction in memory.
2195 // TODO(wala): SSE4.1 has pextrb. 2197 // TODO(wala): SSE4.1 has pextrb.
(...skipping 318 matching lines...) Expand 10 before | Expand all | Expand 10 after
2514 } 2516 }
2515 2517
2516 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { 2518 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
2517 Operand *SourceVectOperand = Inst->getSrc(0); 2519 Operand *SourceVectOperand = Inst->getSrc(0);
2518 Operand *ElementToInsert = Inst->getSrc(1); 2520 Operand *ElementToInsert = Inst->getSrc(1);
2519 ConstantInteger *ElementIndex = 2521 ConstantInteger *ElementIndex =
2520 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2)); 2522 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2));
2521 // Only constant indices are allowed in PNaCl IR. 2523 // Only constant indices are allowed in PNaCl IR.
2522 assert(ElementIndex); 2524 assert(ElementIndex);
2523 unsigned Index = ElementIndex->getValue(); 2525 unsigned Index = ElementIndex->getValue();
2526 assert(Index < typeNumElements(SourceVectOperand->getType()));
2524 2527
2525 Type Ty = SourceVectOperand->getType(); 2528 Type Ty = SourceVectOperand->getType();
2526 Type ElementTy = typeElementType(Ty); 2529 Type ElementTy = typeElementType(Ty);
2527 Type InVectorElementTy = getInVectorElementType(Ty); 2530 Type InVectorElementTy = getInVectorElementType(Ty);
2528 2531
2529 if (ElementTy == IceType_i1) { 2532 if (ElementTy == IceType_i1) {
2530 // Expand the element to the appropriate size for it to be inserted 2533 // Expand the element to the appropriate size for it to be inserted
2531 // in the vector. 2534 // in the vector.
2532 Variable *Expanded = 2535 Variable *Expanded =
2533 Func->makeVariable(InVectorElementTy, Context.getNode()); 2536 Func->makeVariable(InVectorElementTy, Context.getNode());
2534 InstCast *Cast = 2537 InstCast *Cast =
2535 InstCast::create(Func, InstCast::Zext, Expanded, ElementToInsert); 2538 InstCast::create(Func, InstCast::Zext, Expanded, ElementToInsert);
2536 lowerCast(Cast); 2539 lowerCast(Cast);
2537 ElementToInsert = Expanded; 2540 ElementToInsert = Expanded;
2538 } 2541 }
2539 2542
2540 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2543 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2541 // Lower insertelement with 32-bit wide elements using shufps. 2544 // Lower insertelement with 32-bit wide elements using shufps or
2545 // movss.
2542 // TODO(wala): SSE4.1 has pinsrd and insertps. 2546 // TODO(wala): SSE4.1 has pinsrd and insertps.
2543 Variable *Element = NULL; 2547 Variable *Element = NULL;
2544 if (InVectorElementTy == IceType_f32) { 2548 if (InVectorElementTy == IceType_f32) {
2545 // Element will be in an XMM register since it is floating point. 2549 // Element will be in an XMM register since it is floating point.
2546 Element = legalizeToVar(ElementToInsert); 2550 Element = legalizeToVar(ElementToInsert);
2547 } else { 2551 } else {
2548 // Copy an integer to an XMM register. 2552 // Copy an integer to an XMM register.
2549 Operand *T = legalize(ElementToInsert, Legal_Reg | Legal_Mem); 2553 Operand *T = legalize(ElementToInsert, Legal_Reg | Legal_Mem);
2550 Element = makeReg(Ty); 2554 Element = makeReg(Ty);
2551 _movd(Element, T); 2555 _movd(Element, T);
2552 } 2556 }
2553 2557
2558 if (Index == 0) {
2559 Variable *T = makeReg(Ty);
2560 _movp(T, SourceVectOperand);
2561 _movss(T, Element);
2562 _movp(Inst->getDest(), T);
2563 return;
2564 }
2565
2554 // shufps treats the source and desination operands as vectors of 2566 // shufps treats the source and desination operands as vectors of
2555 // four doublewords. The destination's two high doublewords are 2567 // four doublewords. The destination's two high doublewords are
2556 // selected from the source operand and the two low doublewords are 2568 // selected from the source operand and the two low doublewords are
2557 // selected from the (original value of) the destination operand. 2569 // selected from the (original value of) the destination operand.
2558 // An insertelement operation can be effected with a sequence of two 2570 // An insertelement operation can be effected with a sequence of two
2559 // shufps operations with appropriate masks. In all cases below, 2571 // shufps operations with appropriate masks. In all cases below,
2560 // Element[0] is being inserted into SourceVectOperand. Indices are 2572 // Element[0] is being inserted into SourceVectOperand. Indices are
2561 // ordered from left to right. 2573 // ordered from left to right.
2562 // 2574 //
2563 // insertelement into index 0 (result is stored in Element):
2564 // Element := Element[0, 0] SourceVectOperand[0, 1]
2565 // Element := Element[0, 3] SourceVectOperand[2, 3]
2566 //
2567 // insertelement into index 1 (result is stored in Element): 2575 // insertelement into index 1 (result is stored in Element):
2568 // Element := Element[0, 0] SourceVectOperand[0, 0] 2576 // Element := Element[0, 0] SourceVectOperand[0, 0]
2569 // Element := Element[3, 0] SourceVectOperand[2, 3] 2577 // Element := Element[3, 0] SourceVectOperand[2, 3]
2570 // 2578 //
2571 // insertelement into index 2 (result is stored in T): 2579 // insertelement into index 2 (result is stored in T):
2572 // T := SourceVectOperand 2580 // T := SourceVectOperand
2573 // Element := Element[0, 0] T[0, 3] 2581 // Element := Element[0, 0] T[0, 3]
2574 // T := T[0, 1] Element[0, 3] 2582 // T := T[0, 1] Element[0, 3]
2575 // 2583 //
2576 // insertelement into index 3 (result is stored in T): 2584 // insertelement into index 3 (result is stored in T):
2577 // T := SourceVectOperand 2585 // T := SourceVectOperand
2578 // Element := Element[0, 0] T[0, 2] 2586 // Element := Element[0, 0] T[0, 2]
2579 // T := T[0, 1] Element[3, 0] 2587 // T := T[0, 1] Element[3, 0]
2580 const unsigned char Mask1[4] = {64, 0, 192, 128}; 2588 const unsigned char Mask1[3] = {0, 192, 128};
2581 const unsigned char Mask2[4] = {236, 227, 196, 52}; 2589 const unsigned char Mask2[3] = {227, 196, 52};
2582 2590
2583 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index]); 2591 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);
2584 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index]); 2592 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);
2585 2593
2586 // ALIGNHACK: Force vector operands to registers in instructions that 2594 // ALIGNHACK: Force vector operands to registers in instructions that
2587 // require aligned memory operands until support for stack alignment 2595 // require aligned memory operands until support for stack alignment
2588 // is implemented. 2596 // is implemented.
2589 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) 2597 #define ALIGN_HACK(Vect) legalizeToVar((Vect))
2590 if (Index < 2) { 2598 if (Index == 1) {
2591 SourceVectOperand = ALIGN_HACK(SourceVectOperand); 2599 SourceVectOperand = ALIGN_HACK(SourceVectOperand);
2592 _shufps(Element, SourceVectOperand, Mask1Constant); 2600 _shufps(Element, SourceVectOperand, Mask1Constant);
2593 _shufps(Element, SourceVectOperand, Mask2Constant); 2601 _shufps(Element, SourceVectOperand, Mask2Constant);
2594 _movp(Inst->getDest(), Element); 2602 _movp(Inst->getDest(), Element);
2595 } else { 2603 } else {
2596 Variable *T = makeReg(Ty); 2604 Variable *T = makeReg(Ty);
2597 _movp(T, SourceVectOperand); 2605 _movp(T, SourceVectOperand);
2598 _shufps(Element, T, Mask1Constant); 2606 _shufps(Element, T, Mask1Constant);
2599 _shufps(T, Element, Mask2Constant); 2607 _shufps(T, Element, Mask2Constant);
2600 _movp(Inst->getDest(), T); 2608 _movp(Inst->getDest(), T);
(...skipping 1409 matching lines...) Expand 10 before | Expand all | Expand 10 after
4010 for (SizeT i = 0; i < Size; ++i) { 4018 for (SizeT i = 0; i < Size; ++i) {
4011 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 4019 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
4012 } 4020 }
4013 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 4021 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4014 } 4022 }
4015 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 4023 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
4016 << "\n"; 4024 << "\n";
4017 } 4025 }
4018 4026
4019 } // end of namespace Ice 4027 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698