Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(647)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 401523003: Lower insertelement and extractelement. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Rebase Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/vector-ops.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
78 #undef X 78 #undef X
79 }; 79 };
80 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64); 80 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
81 81
82 InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { 82 InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
83 size_t Index = static_cast<size_t>(Cond); 83 size_t Index = static_cast<size_t>(Cond);
84 assert(Index < TableIcmp32Size); 84 assert(Index < TableIcmp32Size);
85 return TableIcmp32[Index].Mapping; 85 return TableIcmp32[Index].Mapping;
86 } 86 }
87 87
88 const struct TableTypeX8632Attributes_ {
89 Type InVectorElementType;
90 } TableTypeX8632Attributes[] = {
91 #define X(tag, elementty, cvt, sdss, pack, width) \
92 { elementty } \
93 ,
94 ICETYPEX8632_TABLE
95 #undef X
96 };
97 const size_t TableTypeX8632AttributesSize =
98 llvm::array_lengthof(TableTypeX8632Attributes);
99
100 // Return the type which the elements of the vector have in the X86
101 // representation of the vector.
102 Type getInVectorElementType(Type Ty) {
103 assert(isVectorType(Ty));
104 size_t Index = static_cast<size_t>(Ty);
105 assert(Index < TableTypeX8632AttributesSize);
106 return TableTypeX8632Attributes[Ty].InVectorElementType;
107 }
108
88 // The maximum number of arguments to pass in XMM registers 109 // The maximum number of arguments to pass in XMM registers
89 const unsigned X86_MAX_XMM_ARGS = 4; 110 const unsigned X86_MAX_XMM_ARGS = 4;
90 // The number of bits in a byte 111 // The number of bits in a byte
91 const unsigned X86_CHAR_BIT = 8; 112 const unsigned X86_CHAR_BIT = 8;
92 113
93 // Return a string representation of the type that is suitable for use 114 // Return a string representation of the type that is suitable for use
94 // in an identifier. 115 // in an identifier.
95 IceString typeIdentString(const Type Ty) { 116 IceString typeIdentString(const Type Ty) {
96 IceString Str; 117 IceString Str;
97 llvm::raw_string_ostream BaseOS(Str); 118 llvm::raw_string_ostream BaseOS(Str);
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
166 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); 187 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
167 ICEINSTICMP_TABLE; 188 ICEINSTICMP_TABLE;
168 #undef X 189 #undef X
169 } 190 }
170 191
171 // Validate the enum values in ICETYPEX8632_TABLE. 192 // Validate the enum values in ICETYPEX8632_TABLE.
172 { 193 {
173 // Define a temporary set of enum values based on low-level 194 // Define a temporary set of enum values based on low-level
174 // table entries. 195 // table entries.
175 enum _tmp_enum { 196 enum _tmp_enum {
176 #define X(tag, cvt, sdss, pack, width) _tmp_##tag, 197 #define X(tag, elementty, cvt, sdss, pack, width) _tmp_##tag,
177 ICETYPEX8632_TABLE 198 ICETYPEX8632_TABLE
178 #undef X 199 #undef X
179 _num 200 _num
180 }; 201 };
181 // Define a set of constants based on high-level table entries. 202 // Define a set of constants based on high-level table entries.
182 #define X(tag, size, align, elts, elty, str) \ 203 #define X(tag, size, align, elts, elty, str) \
183 static const int _table1_##tag = tag; 204 static const int _table1_##tag = tag;
184 ICETYPE_TABLE; 205 ICETYPE_TABLE;
185 #undef X 206 #undef X
186 // Define a set of constants based on low-level table entries, 207 // Define a set of constants based on low-level table entries,
187 // and ensure the table entry keys are consistent. 208 // and ensure the table entry keys are consistent.
188 #define X(tag, cvt, sdss, pack, width) \ 209 #define X(tag, elementty, cvt, sdss, pack, width) \
189 static const int _table2_##tag = _tmp_##tag; \ 210 static const int _table2_##tag = _tmp_##tag; \
190 STATIC_ASSERT(_table1_##tag == _table2_##tag); 211 STATIC_ASSERT(_table1_##tag == _table2_##tag);
191 ICETYPEX8632_TABLE; 212 ICETYPEX8632_TABLE;
192 #undef X 213 #undef X
193 // Repeat the static asserts with respect to the high-level 214 // Repeat the static asserts with respect to the high-level
194 // table entries in case the high-level table has extra entries. 215 // table entries in case the high-level table has extra entries.
195 #define X(tag, size, align, elts, elty, str) \ 216 #define X(tag, size, align, elts, elty, str) \
196 STATIC_ASSERT(_table1_##tag == _table2_##tag); 217 STATIC_ASSERT(_table1_##tag == _table2_##tag);
197 ICETYPE_TABLE; 218 ICETYPE_TABLE;
198 #undef X 219 #undef X
(...skipping 1901 matching lines...) Expand 10 before | Expand all | Expand 10 after
2100 case IceType_v4i32: 2121 case IceType_v4i32:
2101 case IceType_v4f32: { 2122 case IceType_v4f32: {
2102 _movp(Dest, legalizeToVar(Src0)); 2123 _movp(Dest, legalizeToVar(Src0));
2103 } break; 2124 } break;
2104 } 2125 }
2105 break; 2126 break;
2106 } 2127 }
2107 } 2128 }
2108 } 2129 }
2109 2130
2131 void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
2132 Operand *SourceVectOperand = Inst->getSrc(0);
2133 ConstantInteger *ElementIndex =
2134 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(1));
2135 // Only constant indices are allowed in PNaCl IR.
2136 assert(ElementIndex);
2137
2138 unsigned Index = ElementIndex->getValue();
2139 Type Ty = SourceVectOperand->getType();
2140 Type ElementTy = typeElementType(Ty);
2141 Type InVectorElementTy = getInVectorElementType(Ty);
2142 Variable *ExtractedElement = makeReg(InVectorElementTy);
2143
2144 // TODO(wala): Determine the best lowering sequences for each type.
2145 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2146 // Lower extractelement operations where the element is 32 bits
2147 // wide with pshufd.
2148 // TODO(wala): SSE4.1 has extractps and pextrd
2149 //
2150 // ALIGNHACK: Force vector operands to registers in instructions that
2151 // require aligned memory operands until support for stack alignment
2152 // is implemented.
2153 #define ALIGN_HACK(Vect) legalizeToVar((Vect))
2154 Operand *T = NULL;
2155 if (Index) {
2156 // The shuffle only needs to occur if the element to be extracted
2157 // is not at the lowest index.
2158 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
2159 T = makeReg(Ty);
2160 _pshufd(llvm::cast<Variable>(T), ALIGN_HACK(SourceVectOperand), Mask);
2161 } else {
2162 // TODO(wala): If SourceVectOperand is in memory, express it as
2163 // mem32 so that the call to legalizeToVar() is made unnecessary.
2164 // _movd and _movss only take mem32 memory operands.
2165 T = legalizeToVar(SourceVectOperand);
2166 }
2167
2168 if (InVectorElementTy == IceType_i32) {
2169 _movd(ExtractedElement, T);
2170 } else { // InVectorElementTy == IceType_f32
2171 // TODO: _mov should be able to be used here.
2172 _movss(ExtractedElement, T);
2173 }
2174 #undef ALIGN_HACK
2175 } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) {
2176 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
2177 _pextrw(ExtractedElement, legalizeToVar(SourceVectOperand), Mask);
2178 } else {
2179 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2180 // Spill the value to a stack slot and do the extraction in memory.
2181 // TODO(wala): SSE4.1 has pextrb.
2182 //
2183 // TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when
2184 // support for legalizing to mem is implemented.
2185 Variable *Slot = Func->makeVariable(Ty, Context.getNode());
2186 Slot->setWeight(RegWeight::Zero);
2187 _movp(Slot, legalizeToVar(SourceVectOperand));
2188
2189 // Compute the location of the element in memory.
2190 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2191 OperandX8632Mem *Loc =
2192 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
2193 _mov(ExtractedElement, Loc);
2194 }
2195
2196 if (ElementTy == IceType_i1) {
2197 // Truncate extracted integers to i1s if necessary.
2198 Variable *T = makeReg(IceType_i1);
2199 InstCast *Cast =
2200 InstCast::create(Func, InstCast::Trunc, T, ExtractedElement);
2201 lowerCast(Cast);
2202 ExtractedElement = T;
2203 }
2204
2205 // Copy the element to the destination.
2206 Variable *Dest = Inst->getDest();
2207 _mov(Dest, ExtractedElement);
2208 }
2209
2110 void TargetX8632::lowerFcmp(const InstFcmp *Inst) { 2210 void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
2111 Operand *Src0 = Inst->getSrc(0); 2211 Operand *Src0 = Inst->getSrc(0);
2112 Operand *Src1 = Inst->getSrc(1); 2212 Operand *Src1 = Inst->getSrc(1);
2113 Variable *Dest = Inst->getDest(); 2213 Variable *Dest = Inst->getDest();
2114 // Lowering a = fcmp cond, b, c 2214 // Lowering a = fcmp cond, b, c
2115 // ucomiss b, c /* only if C1 != Br_None */ 2215 // ucomiss b, c /* only if C1 != Br_None */
2116 // /* but swap b,c order if SwapOperands==true */ 2216 // /* but swap b,c order if SwapOperands==true */
2117 // mov a, <default> 2217 // mov a, <default>
2118 // j<C1> label /* only if C1 != Br_None */ 2218 // j<C1> label /* only if C1 != Br_None */
2119 // j<C2> label /* only if C2 != Br_None */ 2219 // j<C2> label /* only if C2 != Br_None */
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
2231 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); 2331 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
2232 InstX8632Label *Label = InstX8632Label::create(Func, this); 2332 InstX8632Label *Label = InstX8632Label::create(Func, this);
2233 _cmp(Src0New, Src1); 2333 _cmp(Src0New, Src1);
2234 _mov(Dest, One); 2334 _mov(Dest, One);
2235 _br(getIcmp32Mapping(Inst->getCondition()), Label); 2335 _br(getIcmp32Mapping(Inst->getCondition()), Label);
2236 Context.insert(InstFakeUse::create(Func, Dest)); 2336 Context.insert(InstFakeUse::create(Func, Dest));
2237 _mov(Dest, Zero); 2337 _mov(Dest, Zero);
2238 Context.insert(Label); 2338 Context.insert(Label);
2239 } 2339 }
2240 2340
2341 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
2342 Operand *SourceVectOperand = Inst->getSrc(0);
2343 Operand *ElementToInsert = Inst->getSrc(1);
2344 ConstantInteger *ElementIndex =
2345 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2));
2346 // Only constant indices are allowed in PNaCl IR.
2347 assert(ElementIndex);
2348 unsigned Index = ElementIndex->getValue();
2349
2350 Type Ty = SourceVectOperand->getType();
2351 Type ElementTy = typeElementType(Ty);
2352 Type InVectorElementTy = getInVectorElementType(Ty);
2353
2354 if (ElementTy == IceType_i1) {
2355 // Expand the element to the appropriate size for it to be inserted
2356 // in the vector.
2357 Variable *Expanded =
2358 Func->makeVariable(InVectorElementTy, Context.getNode());
2359 InstCast *Cast =
2360 InstCast::create(Func, InstCast::Zext, Expanded, ElementToInsert);
2361 lowerCast(Cast);
2362 ElementToInsert = Expanded;
2363 }
2364
2365 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2366 // Lower insertelement with 32-bit wide elements using shufps.
2367 // TODO(wala): SSE4.1 has pinsrd and insertps.
2368 Variable *Element = NULL;
2369 if (InVectorElementTy == IceType_f32) {
2370 // Element will be in an XMM register since it is floating point.
2371 Element = legalizeToVar(ElementToInsert);
2372 } else {
2373 // Copy an integer to an XMM register.
2374 Operand *T = legalize(ElementToInsert, Legal_Reg | Legal_Mem);
2375 Element = makeReg(Ty);
2376 _movd(Element, T);
2377 }
2378
2379 // shufps treats the source and desination operands as vectors of
2380 // four doublewords. The destination's two high doublewords are
2381 // selected from the source operand and the two low doublewords are
2382 // selected from the (original value of) the destination operand.
2383 // An insertelement operation can be effected with a sequence of two
2384 // shufps operations with appropriate masks. In all cases below,
2385 // Element[0] is being inserted into SourceVectOperand. Indices are
2386 // ordered from left to right.
2387 //
2388 // insertelement into index 0 (result is stored in Element):
2389 // Element := Element[0, 0] SourceVectOperand[0, 1]
2390 // Element := Element[0, 3] SourceVectOperand[2, 3]
2391 //
2392 // insertelement into index 1 (result is stored in Element):
2393 // Element := Element[0, 0] SourceVectOperand[0, 0]
2394 // Element := Element[3, 0] SourceVectOperand[2, 3]
2395 //
2396 // insertelement into index 2 (result is stored in T):
2397 // T := SourceVectOperand
2398 // Element := Element[0, 0] T[0, 3]
2399 // T := T[0, 1] Element[0, 3]
2400 //
2401 // insertelement into index 3 (result is stored in T):
2402 // T := SourceVectOperand
2403 // Element := Element[0, 0] T[0, 2]
2404 // T := T[0, 1] Element[3, 0]
2405 const unsigned char Mask1[4] = {64, 0, 192, 128};
2406 const unsigned char Mask2[4] = {236, 227, 196, 52};
2407
2408 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index]);
2409 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index]);
2410
2411 // ALIGNHACK: Force vector operands to registers in instructions that
2412 // require aligned memory operands until support for stack alignment
2413 // is implemented.
2414 #define ALIGN_HACK(Vect) legalizeToVar((Vect))
2415 if (Index < 2) {
2416 SourceVectOperand = ALIGN_HACK(SourceVectOperand);
2417 _shufps(Element, SourceVectOperand, Mask1Constant);
2418 _shufps(Element, SourceVectOperand, Mask2Constant);
2419 _movp(Inst->getDest(), Element);
2420 } else {
2421 Variable *T = makeReg(Ty);
2422 _movp(T, SourceVectOperand);
2423 _shufps(Element, T, Mask1Constant);
2424 _shufps(T, Element, Mask2Constant);
2425 _movp(Inst->getDest(), T);
2426 }
2427 #undef ALIGN_HACK
2428 } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) {
2429 Operand *Element = legalize(ElementToInsert, Legal_Mem | Legal_Reg);
2430 Variable *T = makeReg(Ty);
2431 _movp(T, SourceVectOperand);
2432 _pinsrw(T, Element, Ctx->getConstantInt(IceType_i8, Index));
2433 _movp(Inst->getDest(), T);
2434 } else {
2435 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2436 // Spill the value to a stack slot and perform the insertion in
2437 // memory.
2438 // TODO(wala): SSE4.1 has pinsrb.
2439 //
2440 // TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when
2441 // support for legalizing to mem is implemented.
2442 Variable *Slot = Func->makeVariable(Ty, Context.getNode());
2443 Slot->setWeight(RegWeight::Zero);
2444 _movp(Slot, legalizeToVar(SourceVectOperand));
2445
2446 // Compute the location of the position to insert in memory.
2447 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2448 OperandX8632Mem *Loc =
2449 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
2450 _store(legalizeToVar(ElementToInsert), Loc);
2451
2452 Variable *T = makeReg(Ty);
2453 _movp(T, Slot);
2454 _movp(Inst->getDest(), T);
2455 }
2456 }
2457
2241 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { 2458 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
2242 switch (Instr->getIntrinsicInfo().ID) { 2459 switch (Instr->getIntrinsicInfo().ID) {
2243 case Intrinsics::AtomicCmpxchg: { 2460 case Intrinsics::AtomicCmpxchg: {
2244 if (!Intrinsics::VerifyMemoryOrder( 2461 if (!Intrinsics::VerifyMemoryOrder(
2245 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { 2462 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {
2246 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); 2463 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");
2247 return; 2464 return;
2248 } 2465 }
2249 if (!Intrinsics::VerifyMemoryOrder( 2466 if (!Intrinsics::VerifyMemoryOrder(
2250 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { 2467 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) {
(...skipping 911 matching lines...) Expand 10 before | Expand all | Expand 10 after
3162 // this value is initialized using register operations. 3379 // this value is initialized using register operations.
3163 Variable *Dest = makeVectorOfZeros(Ty, RegNum); 3380 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
3164 Variable *MinusOne = makeReg(Ty); 3381 Variable *MinusOne = makeReg(Ty);
3165 // Insert a FakeDef so the live range of MinusOne is not overestimated. 3382 // Insert a FakeDef so the live range of MinusOne is not overestimated.
3166 Context.insert(InstFakeDef::create(Func, MinusOne)); 3383 Context.insert(InstFakeDef::create(Func, MinusOne));
3167 _pcmpeq(MinusOne, MinusOne); 3384 _pcmpeq(MinusOne, MinusOne);
3168 _psub(Dest, MinusOne); 3385 _psub(Dest, MinusOne);
3169 return Dest; 3386 return Dest;
3170 } 3387 }
3171 3388
3389 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
3390 Variable *Slot,
3391 uint32_t Offset) {
3392 // Ensure that Loc is a stack slot.
3393 assert(Slot->getWeight() == RegWeight::Zero);
3394 assert(Slot->getRegNum() == Variable::NoRegister);
3395 // Compute the location of Loc in memory.
3396 // TODO(wala,stichnot): lea should not be required. The address of
3397 // the stack slot is known at compile time (although not until after
3398 // addProlog()).
3399 const Type PointerType = IceType_i32;
3400 Variable *Loc = makeReg(PointerType);
3401 _lea(Loc, Slot);
3402 Constant *ConstantOffset = Ctx->getConstantInt(IceType_i32, Offset);
3403 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
3404 }
3405
3172 // Helper for legalize() to emit the right code to lower an operand to a 3406 // Helper for legalize() to emit the right code to lower an operand to a
3173 // register of the appropriate type. 3407 // register of the appropriate type.
3174 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) { 3408 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
3175 Type Ty = Src->getType(); 3409 Type Ty = Src->getType();
3176 Variable *Reg = makeReg(Ty, RegNum); 3410 Variable *Reg = makeReg(Ty, RegNum);
3177 if (isVectorType(Ty)) { 3411 if (isVectorType(Ty)) {
3178 _movp(Reg, Src); 3412 _movp(Reg, Src);
3179 } else { 3413 } else {
3180 _mov(Reg, Src); 3414 _mov(Reg, Src);
3181 } 3415 }
(...skipping 301 matching lines...) Expand 10 before | Expand all | Expand 10 after
3483 for (SizeT i = 0; i < Size; ++i) { 3717 for (SizeT i = 0; i < Size; ++i) {
3484 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 3718 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
3485 } 3719 }
3486 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 3720 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
3487 } 3721 }
3488 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 3722 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
3489 << "\n"; 3723 << "\n";
3490 } 3724 }
3491 3725
3492 } // end of namespace Ice 3726 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/vector-ops.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698