Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(560)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 401523003: Lower insertelement and extractelement. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« src/IceInstX8632.cpp ('K') | « src/IceTargetLoweringX8632.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
78 #undef X 78 #undef X
79 }; 79 };
80 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64); 80 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
81 81
82 InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { 82 InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
83 size_t Index = static_cast<size_t>(Cond); 83 size_t Index = static_cast<size_t>(Cond);
84 assert(Index < TableIcmp32Size); 84 assert(Index < TableIcmp32Size);
85 return TableIcmp32[Index].Mapping; 85 return TableIcmp32[Index].Mapping;
86 } 86 }
87 87
88 // Return the type which the elements of the vector have in the X86
89 // representation.
90 Type getLoweredElementType(Type Ty) {
91 assert(isVectorType(Ty));
92 switch (Ty) {
93 default:
Jim Stichnoth 2014/07/17 19:49:01 Can this be encoded in ICETYPEX8632_TABLE?
wala 2014/07/17 22:14:12 Done. I've changed the name of this function to g
94 return typeElementType(Ty);
95 case IceType_v4i1:
96 return IceType_i32;
97 case IceType_v8i1:
98 return IceType_i16;
99 case IceType_v16i1:
100 return IceType_i8;
101 }
102 }
103
88 // The maximum number of arguments to pass in XMM registers 104 // The maximum number of arguments to pass in XMM registers
89 const unsigned X86_MAX_XMM_ARGS = 4; 105 const unsigned X86_MAX_XMM_ARGS = 4;
90 // The number of bits in a byte 106 // The number of bits in a byte
91 const unsigned X86_CHAR_BIT = 8; 107 const unsigned X86_CHAR_BIT = 8;
92 108
93 // In some cases, there are x-macros tables for both high-level and 109 // In some cases, there are x-macros tables for both high-level and
94 // low-level instructions/operands that use the same enum key value. 110 // low-level instructions/operands that use the same enum key value.
95 // The tables are kept separate to maintain a proper separation 111 // The tables are kept separate to maintain a proper separation
96 // between abstraction layers. There is a risk that the tables 112 // between abstraction layers. There is a risk that the tables
97 // could get out of sync if enum values are reordered or if entries 113 // could get out of sync if enum values are reordered or if entries
(...skipping 1840 matching lines...) Expand 10 before | Expand all | Expand 10 after
1938 case IceType_v4i32: 1954 case IceType_v4i32:
1939 case IceType_v4f32: { 1955 case IceType_v4f32: {
1940 _movp(Dest, legalizeToVar(Src0)); 1956 _movp(Dest, legalizeToVar(Src0));
1941 } break; 1957 } break;
1942 } 1958 }
1943 break; 1959 break;
1944 } 1960 }
1945 } 1961 }
1946 } 1962 }
1947 1963
1964 void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
1965 Operand *SourceVectOperand = Inst->getSrc(0);
1966 ConstantInteger *ElementIndex =
1967 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(1));
1968 // Only constant indices are allowed in PNaCl IR.
1969 assert(ElementIndex);
1970
1971 unsigned Index = ElementIndex->getValue();
1972 Type Ty = SourceVectOperand->getType();
1973 Type ElementTy = typeElementType(Ty);
1974 Type LoweredElementTy = getLoweredElementType(Ty);
1975 Variable *ExtractedElement = makeReg(LoweredElementTy);
1976
1977 // TODO(wala): Determine the best lowering sequences for each type.
1978 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
1979 // Lower extractelement operations where the element is 32 bits
1980 // wide with pshufd.
1981 // TODO(wala): SSE4.1 has extractps and pextrd
1982 //
1983 // ALIGNHACK: Force vector operands to registers in instructions that
1984 // require aligned memory operands until support for stack alignment
1985 // is implemented.
1986 #define ALIGN_HACK(Vect) legalizeToVar(Vect)
Jim Stichnoth 2014/07/17 19:49:01 To be absolutely macro-safe, I think this should b
wala 2014/07/17 22:14:12 Done.
1987 Operand *T = NULL;
1988 if (Index) {
1989 // The shuffle only needs to occur if the element to be extracted
1990 // is not at the lowest index.
1991 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
1992 T = makeReg(Ty);
1993 _pshufd(llvm::cast<Variable>(T), ALIGN_HACK(SourceVectOperand), Mask);
1994 } else {
1995 // TODO(wala): If SourceVectOperand is in memory, express it as
1996 // mem32 so that the call to legalizeToVar() is made unnecessary.
1997 // _movd and _movss only take mem32 memory operands.
1998 T = legalizeToVar(SourceVectOperand);
1999 }
2000
2001 if (LoweredElementTy == IceType_i32) {
2002 _movd(ExtractedElement, T);
2003 } else { // LoweredElementTy == IceType_f32
2004 _movss(ExtractedElement, T);
2005 }
2006 #undef ALIGN_HACK
2007 } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) {
2008 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
2009 _pextrw(ExtractedElement, legalizeToVar(SourceVectOperand), Mask);
2010 } else {
2011 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2012 // Spill the value to a stack slot and do the extraction in memory.
2013 // TODO(wala): SSE4.1 has pextrb.
2014 //
2015 // TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when
2016 // support for legalizing to mem is implemented.
2017 Variable *Slot = Func->makeVariable(Ty, Context.getNode());
2018 Slot->setWeight(RegWeight::Zero);
2019 _movp(Slot, legalizeToVar(SourceVectOperand));
2020
2021 // Compute the location of the element in memory.
2022 unsigned Offset = Index * typeWidthInBytes(LoweredElementTy);
2023 OperandX8632Mem *Loc =
2024 getMemoryOperandForStackSlot(LoweredElementTy, Slot, Offset);
2025 _mov(ExtractedElement, Loc);
2026 }
2027
2028 if (ElementTy == IceType_i1) {
2029 // Truncate extracted integers to i1s if necessary.
2030 Variable *T = makeReg(IceType_i1);
2031 InstCast *Cast =
2032 InstCast::create(Func, InstCast::Trunc, T, ExtractedElement);
2033 lowerCast(Cast);
2034 ExtractedElement = T;
2035 }
2036
2037 // Copy the element to the destination.
2038 Variable *Dest = Inst->getDest();
2039 _mov(Dest, ExtractedElement);
2040 }
2041
1948 void TargetX8632::lowerFcmp(const InstFcmp *Inst) { 2042 void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
1949 Operand *Src0 = Inst->getSrc(0); 2043 Operand *Src0 = Inst->getSrc(0);
1950 Operand *Src1 = Inst->getSrc(1); 2044 Operand *Src1 = Inst->getSrc(1);
1951 Variable *Dest = Inst->getDest(); 2045 Variable *Dest = Inst->getDest();
1952 // Lowering a = fcmp cond, b, c 2046 // Lowering a = fcmp cond, b, c
1953 // ucomiss b, c /* only if C1 != Br_None */ 2047 // ucomiss b, c /* only if C1 != Br_None */
1954 // /* but swap b,c order if SwapOperands==true */ 2048 // /* but swap b,c order if SwapOperands==true */
1955 // mov a, <default> 2049 // mov a, <default>
1956 // j<C1> label /* only if C1 != Br_None */ 2050 // j<C1> label /* only if C1 != Br_None */
1957 // j<C2> label /* only if C2 != Br_None */ 2051 // j<C2> label /* only if C2 != Br_None */
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
2069 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); 2163 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
2070 InstX8632Label *Label = InstX8632Label::create(Func, this); 2164 InstX8632Label *Label = InstX8632Label::create(Func, this);
2071 _cmp(Src0New, Src1); 2165 _cmp(Src0New, Src1);
2072 _mov(Dest, One); 2166 _mov(Dest, One);
2073 _br(getIcmp32Mapping(Inst->getCondition()), Label); 2167 _br(getIcmp32Mapping(Inst->getCondition()), Label);
2074 Context.insert(InstFakeUse::create(Func, Dest)); 2168 Context.insert(InstFakeUse::create(Func, Dest));
2075 _mov(Dest, Zero); 2169 _mov(Dest, Zero);
2076 Context.insert(Label); 2170 Context.insert(Label);
2077 } 2171 }
2078 2172
2173 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
2174 Operand *SourceVectOperand = Inst->getSrc(0);
2175 Operand *ElementToInsert = Inst->getSrc(1);
2176 ConstantInteger *ElementIndex =
2177 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2));
2178 // Only constant indices are allowed in PNaCl IR.
2179 assert(ElementIndex);
2180 unsigned Index = ElementIndex->getValue();
2181
2182 Type Ty = SourceVectOperand->getType();
2183 Type ElementTy = typeElementType(Ty);
2184 Type LoweredElementTy = getLoweredElementType(Ty);
2185
2186 if (ElementTy == IceType_i1) {
2187 // Expand the element to the appropriate size for it to be inserted
2188 // in the vector.
2189 Variable *Expanded =
2190 Func->makeVariable(LoweredElementTy, Context.getNode());
2191 InstCast *Cast =
2192 InstCast::create(Func, InstCast::Zext, Expanded, ElementToInsert);
2193 lowerCast(Cast);
2194 ElementToInsert = Expanded;
2195 }
2196
2197 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2198 // Lower insertelement with 32-bit wide elements using shufps.
2199 // TODO(wala): SSE4.1 has pinsrd and insertps.
2200 //
2201 // ALIGNHACK: Force vector operands to registers in instructions that
2202 // require aligned memory operands until support for stack alignment
2203 // is implemented.
2204 #define ALIGN_HACK(Vect) legalizeToVar(Vect)
jvoung (off chromium) 2014/07/17 19:36:38 Could move ALIGN_HACK down (near the Index < 2), i
wala 2014/07/17 22:14:12 Done.
2205
2206 Variable *Element = NULL;
2207 if (LoweredElementTy == IceType_f32) {
2208 // Element will be in an XMM register since it is floating point.
2209 Element = legalizeToVar(ElementToInsert);
2210 } else {
2211 // Copy an integer to an XMM register.
2212 Operand *T = legalize(ElementToInsert, Legal_Reg | Legal_Mem);
2213 Element = makeReg(Ty);
2214 _movd(Element, T);
2215 }
2216
2217 // shufps treats the source and desination operands as vectors of
2218 // four doublewords. The destination's two high doublewords are
2219 // selected from the source operand and the two low doublewords are
2220 // selected from the (original value of) the destination operand.
2221 // An insertelement operation can be effected with a sequence of two
2222 // shufps operations with appropriate masks. In all cases below,
2223 // Element[0] is being inserted into SourceVectOperand. Indices are
2224 // ordered from left to right.
2225 //
2226 // insertelement into index 0 (result is stored in Element):
2227 // Element := Element[0, 0] SourceVectOperand[0, 1]
2228 // Element := Element[0, 3] SourceVectOperand[2, 3]
2229 //
2230 // insertelement into index 1 (result is stored in Element):
2231 // Element := Element[0, 0] SourceVectOperand[0, 0]
2232 // Element := Element[3, 0] SourceVectOperand[2, 3]
2233 //
2234 // insertelement into index 2 (result is stored in T):
2235 // T := SourceVectOperand
2236 // Element := Element[0, 0] T[0, 3]
2237 // T := T[0, 1] Element[0, 3]
2238 //
2239 // insertelement into index 3 (result is stored in T):
2240 // T := SourceVectOperand
2241 // Element := Element[0, 0] T[0, 2]
2242 // T := T[0, 1] Element[3, 0]
2243 const unsigned char Mask1[4] = {64, 0, 192, 128};
2244 const unsigned char Mask2[4] = {236, 227, 196, 52};
2245
2246 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index]);
2247 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index]);
2248
2249 if (Index < 2) {
2250 SourceVectOperand = ALIGN_HACK(SourceVectOperand);
2251 _shufps(Element, SourceVectOperand, Mask1Constant);
2252 _shufps(Element, SourceVectOperand, Mask2Constant);
2253 _movp(Inst->getDest(), Element);
2254 } else {
2255 Variable *T = makeReg(Ty);
2256 _movp(T, SourceVectOperand);
2257 _shufps(Element, T, Mask1Constant);
2258 _shufps(T, Element, Mask2Constant);
2259 _movp(Inst->getDest(), T);
2260 }
2261 #undef ALIGN_HACK
2262 } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) {
2263 Operand *Element = legalize(ElementToInsert, Legal_Mem | Legal_Reg);
2264 Variable *T = makeReg(Ty);
2265 _movp(T, SourceVectOperand);
2266 _pinsrw(T, Element, Ctx->getConstantInt(IceType_i8, Index));
2267 _movp(Inst->getDest(), T);
2268 } else {
2269 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2270 // Spill the value to a stack slot and perform the insertion in
2271 // memory.
2272 // TODO(wala): SSE4.1 has pinsrb.
2273 //
2274 // TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when
2275 // support for legalizing to mem is implemented.
2276 Variable *Slot = Func->makeVariable(Ty, Context.getNode());
2277 Slot->setWeight(RegWeight::Zero);
2278 _movp(Slot, legalizeToVar(SourceVectOperand));
2279
2280 // Compute the location of the position to insert in memory.
2281 unsigned Offset = Index * typeWidthInBytes(LoweredElementTy);
2282 OperandX8632Mem *Loc =
2283 getMemoryOperandForStackSlot(LoweredElementTy, Slot, Offset);
2284 _store(legalizeToVar(ElementToInsert), Loc);
2285
2286 Variable *T = makeReg(Ty);
2287 _movp(T, Slot);
2288 _movp(Inst->getDest(), T);
2289 }
2290 }
2291
2079 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { 2292 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
2080 switch (Instr->getIntrinsicInfo().ID) { 2293 switch (Instr->getIntrinsicInfo().ID) {
2081 case Intrinsics::AtomicCmpxchg: { 2294 case Intrinsics::AtomicCmpxchg: {
2082 if (!Intrinsics::VerifyMemoryOrder( 2295 if (!Intrinsics::VerifyMemoryOrder(
2083 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { 2296 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {
2084 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); 2297 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");
2085 return; 2298 return;
2086 } 2299 }
2087 if (!Intrinsics::VerifyMemoryOrder( 2300 if (!Intrinsics::VerifyMemoryOrder(
2088 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { 2301 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) {
(...skipping 911 matching lines...) Expand 10 before | Expand all | Expand 10 after
3000 // this value is initialized using register operations. 3213 // this value is initialized using register operations.
3001 Variable *Dest = makeVectorOfZeros(Ty, RegNum); 3214 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
3002 Variable *MinusOne = makeReg(Ty); 3215 Variable *MinusOne = makeReg(Ty);
3003 // Insert a FakeDef so the live range of MinusOne is not overestimated. 3216 // Insert a FakeDef so the live range of MinusOne is not overestimated.
3004 Context.insert(InstFakeDef::create(Func, MinusOne)); 3217 Context.insert(InstFakeDef::create(Func, MinusOne));
3005 _pcmpeq(MinusOne, MinusOne); 3218 _pcmpeq(MinusOne, MinusOne);
3006 _psub(Dest, MinusOne); 3219 _psub(Dest, MinusOne);
3007 return Dest; 3220 return Dest;
3008 } 3221 }
3009 3222
3223 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
3224 Variable *Slot,
3225 uint32_t Offset) {
3226 // Ensure that Loc is a stack slot.
3227 assert(Slot->getWeight() == RegWeight::Zero);
Jim Stichnoth 2014/07/17 19:49:01 May also want to assert NoRegister.
wala 2014/07/17 22:14:12 Done.
3228 // Compute the location of Loc in memory.
3229 // TODO(wala,stichnot): lea should not be required. The address of
3230 // the stack slot is known at compile time (although not until after
3231 // addProlog()).
3232 const Type PointerType = IceType_i32;
3233 Variable *Loc = makeReg(PointerType);
3234 _lea(Loc, Slot);
3235 Constant *ConstantOffset = Ctx->getConstantInt(IceType_i32, Offset);
3236 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
3237 }
3238
3010 // Helper for legalize() to emit the right code to lower an operand to a 3239 // Helper for legalize() to emit the right code to lower an operand to a
3011 // register of the appropriate type. 3240 // register of the appropriate type.
3012 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) { 3241 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
3013 Type Ty = Src->getType(); 3242 Type Ty = Src->getType();
3014 Variable *Reg = makeReg(Ty, RegNum); 3243 Variable *Reg = makeReg(Ty, RegNum);
3015 if (isVectorType(Ty)) { 3244 if (isVectorType(Ty)) {
3016 _movp(Reg, Src); 3245 _movp(Reg, Src);
3017 } else { 3246 } else {
3018 _mov(Reg, Src); 3247 _mov(Reg, Src);
3019 } 3248 }
(...skipping 301 matching lines...) Expand 10 before | Expand all | Expand 10 after
3321 for (SizeT i = 0; i < Size; ++i) { 3550 for (SizeT i = 0; i < Size; ++i) {
3322 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 3551 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
3323 } 3552 }
3324 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 3553 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
3325 } 3554 }
3326 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 3555 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
3327 << "\n"; 3556 << "\n";
3328 } 3557 }
3329 3558
3330 } // end of namespace Ice 3559 } // end of namespace Ice
OLDNEW
« src/IceInstX8632.cpp ('K') | « src/IceTargetLoweringX8632.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698