Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 401523003: Lower insertelement and extractelement. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Use a forward declaration to name i1 vector types in test_vector_ops_main.cpp Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
78 #undef X 78 #undef X
79 }; 79 };
80 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64); 80 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
81 81
82 InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { 82 InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
83 size_t Index = static_cast<size_t>(Cond); 83 size_t Index = static_cast<size_t>(Cond);
84 assert(Index < TableIcmp32Size); 84 assert(Index < TableIcmp32Size);
85 return TableIcmp32[Index].Mapping; 85 return TableIcmp32[Index].Mapping;
86 } 86 }
87 87
88 const struct TableTypeX8632Attributes_ {
89 Type InVectorElementType;
90 } TableTypeX8632Attributes[] = {
91 #define X(tag, elementty, cvt, sdss, pack, width) \
92 { elementty } \
93 ,
94 ICETYPEX8632_TABLE
95 #undef X
96 };
97 const size_t TableTypeX8632AttributesSize =
98 llvm::array_lengthof(TableTypeX8632Attributes);
99
100 // Return the type which the elements of the vector have in the X86
101 // representation of the vector.
102 Type getInVectorElementType(Type Ty) {
103 assert(isVectorType(Ty));
104 size_t Index = static_cast<size_t>(Ty);
105 assert(Index < TableTypeX8632AttributesSize);
106 return TableTypeX8632Attributes[Ty].InVectorElementType;
107 }
108
88 // The maximum number of arguments to pass in XMM registers 109 // The maximum number of arguments to pass in XMM registers
89 const unsigned X86_MAX_XMM_ARGS = 4; 110 const unsigned X86_MAX_XMM_ARGS = 4;
90 // The number of bits in a byte 111 // The number of bits in a byte
91 const unsigned X86_CHAR_BIT = 8; 112 const unsigned X86_CHAR_BIT = 8;
92 113
93 // In some cases, there are x-macros tables for both high-level and 114 // In some cases, there are x-macros tables for both high-level and
94 // low-level instructions/operands that use the same enum key value. 115 // low-level instructions/operands that use the same enum key value.
95 // The tables are kept separate to maintain a proper separation 116 // The tables are kept separate to maintain a proper separation
96 // between abstraction layers. There is a risk that the tables 117 // between abstraction layers. There is a risk that the tables
97 // could get out of sync if enum values are reordered or if entries 118 // could get out of sync if enum values are reordered or if entries
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
152 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); 173 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
153 ICEINSTICMP_TABLE; 174 ICEINSTICMP_TABLE;
154 #undef X 175 #undef X
155 } 176 }
156 177
157 // Validate the enum values in ICETYPEX8632_TABLE. 178 // Validate the enum values in ICETYPEX8632_TABLE.
158 { 179 {
159 // Define a temporary set of enum values based on low-level 180 // Define a temporary set of enum values based on low-level
160 // table entries. 181 // table entries.
161 enum _tmp_enum { 182 enum _tmp_enum {
162 #define X(tag, cvt, sdss, pack, width) _tmp_##tag, 183 #define X(tag, elementty, cvt, sdss, pack, width) _tmp_##tag,
163 ICETYPEX8632_TABLE 184 ICETYPEX8632_TABLE
164 #undef X 185 #undef X
165 _num 186 _num
166 }; 187 };
167 // Define a set of constants based on high-level table entries. 188 // Define a set of constants based on high-level table entries.
168 #define X(tag, size, align, elts, elty, str) \ 189 #define X(tag, size, align, elts, elty, str) \
169 static const int _table1_##tag = tag; 190 static const int _table1_##tag = tag;
170 ICETYPE_TABLE; 191 ICETYPE_TABLE;
171 #undef X 192 #undef X
172 // Define a set of constants based on low-level table entries, 193 // Define a set of constants based on low-level table entries,
173 // and ensure the table entry keys are consistent. 194 // and ensure the table entry keys are consistent.
174 #define X(tag, cvt, sdss, pack, width) \ 195 #define X(tag, elementty, cvt, sdss, pack, width) \
175 static const int _table2_##tag = _tmp_##tag; \ 196 static const int _table2_##tag = _tmp_##tag; \
176 STATIC_ASSERT(_table1_##tag == _table2_##tag); 197 STATIC_ASSERT(_table1_##tag == _table2_##tag);
177 ICETYPEX8632_TABLE; 198 ICETYPEX8632_TABLE;
178 #undef X 199 #undef X
179 // Repeat the static asserts with respect to the high-level 200 // Repeat the static asserts with respect to the high-level
180 // table entries in case the high-level table has extra entries. 201 // table entries in case the high-level table has extra entries.
181 #define X(tag, size, align, elts, elty, str) \ 202 #define X(tag, size, align, elts, elty, str) \
182 STATIC_ASSERT(_table1_##tag == _table2_##tag); 203 STATIC_ASSERT(_table1_##tag == _table2_##tag);
183 ICETYPE_TABLE; 204 ICETYPE_TABLE;
184 #undef X 205 #undef X
(...skipping 1753 matching lines...) Expand 10 before | Expand all | Expand 10 after
1938 case IceType_v4i32: 1959 case IceType_v4i32:
1939 case IceType_v4f32: { 1960 case IceType_v4f32: {
1940 _movp(Dest, legalizeToVar(Src0)); 1961 _movp(Dest, legalizeToVar(Src0));
1941 } break; 1962 } break;
1942 } 1963 }
1943 break; 1964 break;
1944 } 1965 }
1945 } 1966 }
1946 } 1967 }
1947 1968
1969 void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
1970 Operand *SourceVectOperand = Inst->getSrc(0);
1971 ConstantInteger *ElementIndex =
1972 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(1));
1973 // Only constant indices are allowed in PNaCl IR.
1974 assert(ElementIndex);
1975
1976 unsigned Index = ElementIndex->getValue();
1977 Type Ty = SourceVectOperand->getType();
1978 Type ElementTy = typeElementType(Ty);
1979 Type InVectorElementTy = getInVectorElementType(Ty);
1980 Variable *ExtractedElement = makeReg(InVectorElementTy);
1981
1982 // TODO(wala): Determine the best lowering sequences for each type.
1983 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
1984 // Lower extractelement operations where the element is 32 bits
1985 // wide with pshufd.
1986 // TODO(wala): SSE4.1 has extractps and pextrd
1987 //
1988 // ALIGNHACK: Force vector operands to registers in instructions that
1989 // require aligned memory operands until support for stack alignment
1990 // is implemented.
1991 #define ALIGN_HACK(Vect) legalizeToVar((Vect))
1992 Operand *T = NULL;
1993 if (Index) {
1994 // The shuffle only needs to occur if the element to be extracted
1995 // is not at the lowest index.
1996 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
1997 T = makeReg(Ty);
1998 _pshufd(llvm::cast<Variable>(T), ALIGN_HACK(SourceVectOperand), Mask);
1999 } else {
2000 // TODO(wala): If SourceVectOperand is in memory, express it as
2001 // mem32 so that the call to legalizeToVar() is made unnecessary.
2002 // _movd and _movss only take mem32 memory operands.
2003 T = legalizeToVar(SourceVectOperand);
2004 }
2005
2006 if (InVectorElementTy == IceType_i32) {
2007 _movd(ExtractedElement, T);
2008 } else { // InVectorElementTy == IceType_f32
2009 _movss(ExtractedElement, T);
2010 }
2011 #undef ALIGN_HACK
2012 } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) {
2013 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
2014 _pextrw(ExtractedElement, legalizeToVar(SourceVectOperand), Mask);
2015 } else {
2016 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2017 // Spill the value to a stack slot and do the extraction in memory.
2018 // TODO(wala): SSE4.1 has pextrb.
2019 //
2020 // TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when
2021 // support for legalizing to mem is implemented.
2022 Variable *Slot = Func->makeVariable(Ty, Context.getNode());
2023 Slot->setWeight(RegWeight::Zero);
2024 _movp(Slot, legalizeToVar(SourceVectOperand));
2025
2026 // Compute the location of the element in memory.
2027 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2028 OperandX8632Mem *Loc =
2029 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
2030 _mov(ExtractedElement, Loc);
2031 }
2032
2033 if (ElementTy == IceType_i1) {
2034 // Truncate extracted integers to i1s if necessary.
2035 Variable *T = makeReg(IceType_i1);
2036 InstCast *Cast =
2037 InstCast::create(Func, InstCast::Trunc, T, ExtractedElement);
2038 lowerCast(Cast);
2039 ExtractedElement = T;
2040 }
2041
2042 // Copy the element to the destination.
2043 Variable *Dest = Inst->getDest();
2044 _mov(Dest, ExtractedElement);
2045 }
2046
1948 void TargetX8632::lowerFcmp(const InstFcmp *Inst) { 2047 void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
1949 Operand *Src0 = Inst->getSrc(0); 2048 Operand *Src0 = Inst->getSrc(0);
1950 Operand *Src1 = Inst->getSrc(1); 2049 Operand *Src1 = Inst->getSrc(1);
1951 Variable *Dest = Inst->getDest(); 2050 Variable *Dest = Inst->getDest();
1952 // Lowering a = fcmp cond, b, c 2051 // Lowering a = fcmp cond, b, c
1953 // ucomiss b, c /* only if C1 != Br_None */ 2052 // ucomiss b, c /* only if C1 != Br_None */
1954 // /* but swap b,c order if SwapOperands==true */ 2053 // /* but swap b,c order if SwapOperands==true */
1955 // mov a, <default> 2054 // mov a, <default>
1956 // j<C1> label /* only if C1 != Br_None */ 2055 // j<C1> label /* only if C1 != Br_None */
1957 // j<C2> label /* only if C2 != Br_None */ 2056 // j<C2> label /* only if C2 != Br_None */
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
2069 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); 2168 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
2070 InstX8632Label *Label = InstX8632Label::create(Func, this); 2169 InstX8632Label *Label = InstX8632Label::create(Func, this);
2071 _cmp(Src0New, Src1); 2170 _cmp(Src0New, Src1);
2072 _mov(Dest, One); 2171 _mov(Dest, One);
2073 _br(getIcmp32Mapping(Inst->getCondition()), Label); 2172 _br(getIcmp32Mapping(Inst->getCondition()), Label);
2074 Context.insert(InstFakeUse::create(Func, Dest)); 2173 Context.insert(InstFakeUse::create(Func, Dest));
2075 _mov(Dest, Zero); 2174 _mov(Dest, Zero);
2076 Context.insert(Label); 2175 Context.insert(Label);
2077 } 2176 }
2078 2177
2178 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
2179 Operand *SourceVectOperand = Inst->getSrc(0);
2180 Operand *ElementToInsert = Inst->getSrc(1);
2181 ConstantInteger *ElementIndex =
2182 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2));
2183 // Only constant indices are allowed in PNaCl IR.
2184 assert(ElementIndex);
2185 unsigned Index = ElementIndex->getValue();
2186
2187 Type Ty = SourceVectOperand->getType();
2188 Type ElementTy = typeElementType(Ty);
2189 Type InVectorElementTy = getInVectorElementType(Ty);
2190
2191 if (ElementTy == IceType_i1) {
2192 // Expand the element to the appropriate size for it to be inserted
2193 // in the vector.
2194 Variable *Expanded =
2195 Func->makeVariable(InVectorElementTy, Context.getNode());
2196 InstCast *Cast =
2197 InstCast::create(Func, InstCast::Zext, Expanded, ElementToInsert);
2198 lowerCast(Cast);
2199 ElementToInsert = Expanded;
2200 }
2201
2202 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2203 // Lower insertelement with 32-bit wide elements using shufps.
2204 // TODO(wala): SSE4.1 has pinsrd and insertps.
2205 Variable *Element = NULL;
2206 if (InVectorElementTy == IceType_f32) {
2207 // Element will be in an XMM register since it is floating point.
2208 Element = legalizeToVar(ElementToInsert);
2209 } else {
2210 // Copy an integer to an XMM register.
2211 Operand *T = legalize(ElementToInsert, Legal_Reg | Legal_Mem);
2212 Element = makeReg(Ty);
2213 _movd(Element, T);
2214 }
2215
2216 // shufps treats the source and desination operands as vectors of
2217 // four doublewords. The destination's two high doublewords are
2218 // selected from the source operand and the two low doublewords are
2219 // selected from the (original value of) the destination operand.
2220 // An insertelement operation can be effected with a sequence of two
2221 // shufps operations with appropriate masks. In all cases below,
2222 // Element[0] is being inserted into SourceVectOperand. Indices are
2223 // ordered from left to right.
2224 //
2225 // insertelement into index 0 (result is stored in Element):
2226 // Element := Element[0, 0] SourceVectOperand[0, 1]
2227 // Element := Element[0, 3] SourceVectOperand[2, 3]
2228 //
2229 // insertelement into index 1 (result is stored in Element):
2230 // Element := Element[0, 0] SourceVectOperand[0, 0]
2231 // Element := Element[3, 0] SourceVectOperand[2, 3]
2232 //
2233 // insertelement into index 2 (result is stored in T):
2234 // T := SourceVectOperand
2235 // Element := Element[0, 0] T[0, 3]
2236 // T := T[0, 1] Element[0, 3]
2237 //
2238 // insertelement into index 3 (result is stored in T):
2239 // T := SourceVectOperand
2240 // Element := Element[0, 0] T[0, 2]
2241 // T := T[0, 1] Element[3, 0]
2242 const unsigned char Mask1[4] = {64, 0, 192, 128};
2243 const unsigned char Mask2[4] = {236, 227, 196, 52};
2244
2245 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index]);
2246 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index]);
2247
2248 // ALIGNHACK: Force vector operands to registers in instructions that
2249 // require aligned memory operands until support for stack alignment
2250 // is implemented.
2251 #define ALIGN_HACK(Vect) legalizeToVar((Vect))
2252 if (Index < 2) {
2253 SourceVectOperand = ALIGN_HACK(SourceVectOperand);
2254 _shufps(Element, SourceVectOperand, Mask1Constant);
2255 _shufps(Element, SourceVectOperand, Mask2Constant);
2256 _movp(Inst->getDest(), Element);
2257 } else {
2258 Variable *T = makeReg(Ty);
2259 _movp(T, SourceVectOperand);
2260 _shufps(Element, T, Mask1Constant);
2261 _shufps(T, Element, Mask2Constant);
2262 _movp(Inst->getDest(), T);
2263 }
2264 #undef ALIGN_HACK
2265 } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) {
2266 Operand *Element = legalize(ElementToInsert, Legal_Mem | Legal_Reg);
2267 Variable *T = makeReg(Ty);
2268 _movp(T, SourceVectOperand);
2269 _pinsrw(T, Element, Ctx->getConstantInt(IceType_i8, Index));
2270 _movp(Inst->getDest(), T);
2271 } else {
2272 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2273 // Spill the value to a stack slot and perform the insertion in
2274 // memory.
2275 // TODO(wala): SSE4.1 has pinsrb.
2276 //
2277 // TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when
2278 // support for legalizing to mem is implemented.
2279 Variable *Slot = Func->makeVariable(Ty, Context.getNode());
2280 Slot->setWeight(RegWeight::Zero);
2281 _movp(Slot, legalizeToVar(SourceVectOperand));
2282
2283 // Compute the location of the position to insert in memory.
2284 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2285 OperandX8632Mem *Loc =
2286 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
2287 _store(legalizeToVar(ElementToInsert), Loc);
2288
2289 Variable *T = makeReg(Ty);
2290 _movp(T, Slot);
2291 _movp(Inst->getDest(), T);
2292 }
2293 }
2294
2079 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { 2295 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
2080 switch (Instr->getIntrinsicInfo().ID) { 2296 switch (Instr->getIntrinsicInfo().ID) {
2081 case Intrinsics::AtomicCmpxchg: { 2297 case Intrinsics::AtomicCmpxchg: {
2082 if (!Intrinsics::VerifyMemoryOrder( 2298 if (!Intrinsics::VerifyMemoryOrder(
2083 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { 2299 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {
2084 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); 2300 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");
2085 return; 2301 return;
2086 } 2302 }
2087 if (!Intrinsics::VerifyMemoryOrder( 2303 if (!Intrinsics::VerifyMemoryOrder(
2088 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { 2304 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) {
(...skipping 911 matching lines...) Expand 10 before | Expand all | Expand 10 after
3000 // this value is initialized using register operations. 3216 // this value is initialized using register operations.
3001 Variable *Dest = makeVectorOfZeros(Ty, RegNum); 3217 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
3002 Variable *MinusOne = makeReg(Ty); 3218 Variable *MinusOne = makeReg(Ty);
3003 // Insert a FakeDef so the live range of MinusOne is not overestimated. 3219 // Insert a FakeDef so the live range of MinusOne is not overestimated.
3004 Context.insert(InstFakeDef::create(Func, MinusOne)); 3220 Context.insert(InstFakeDef::create(Func, MinusOne));
3005 _pcmpeq(MinusOne, MinusOne); 3221 _pcmpeq(MinusOne, MinusOne);
3006 _psub(Dest, MinusOne); 3222 _psub(Dest, MinusOne);
3007 return Dest; 3223 return Dest;
3008 } 3224 }
3009 3225
3226 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
3227 Variable *Slot,
3228 uint32_t Offset) {
3229 // Ensure that Loc is a stack slot.
3230 assert(Slot->getWeight() == RegWeight::Zero &&
Jim Stichnoth 2014/07/18 17:21:01 Use 2 separate asserts for more precise diagnostic
wala 2014/07/18 19:36:13 Done.
3231 Slot->getRegNum() == Variable::NoRegister);
3232 // Compute the location of Loc in memory.
3233 // TODO(wala,stichnot): lea should not be required. The address of
3234 // the stack slot is known at compile time (although not until after
3235 // addProlog()).
3236 const Type PointerType = IceType_i32;
3237 Variable *Loc = makeReg(PointerType);
3238 _lea(Loc, Slot);
3239 Constant *ConstantOffset = Ctx->getConstantInt(IceType_i32, Offset);
3240 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
3241 }
3242
3010 // Helper for legalize() to emit the right code to lower an operand to a 3243 // Helper for legalize() to emit the right code to lower an operand to a
3011 // register of the appropriate type. 3244 // register of the appropriate type.
3012 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) { 3245 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
3013 Type Ty = Src->getType(); 3246 Type Ty = Src->getType();
3014 Variable *Reg = makeReg(Ty, RegNum); 3247 Variable *Reg = makeReg(Ty, RegNum);
3015 if (isVectorType(Ty)) { 3248 if (isVectorType(Ty)) {
3016 _movp(Reg, Src); 3249 _movp(Reg, Src);
3017 } else { 3250 } else {
3018 _mov(Reg, Src); 3251 _mov(Reg, Src);
3019 } 3252 }
(...skipping 301 matching lines...) Expand 10 before | Expand all | Expand 10 after
3321 for (SizeT i = 0; i < Size; ++i) { 3554 for (SizeT i = 0; i < Size; ++i) {
3322 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 3555 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
3323 } 3556 }
3324 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 3557 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
3325 } 3558 }
3326 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 3559 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
3327 << "\n"; 3560 << "\n";
3328 } 3561 }
3329 3562
3330 } // end of namespace Ice 3563 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698