OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
78 #undef X | 78 #undef X |
79 }; | 79 }; |
80 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64); | 80 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64); |
81 | 81 |
82 InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { | 82 InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { |
83 size_t Index = static_cast<size_t>(Cond); | 83 size_t Index = static_cast<size_t>(Cond); |
84 assert(Index < TableIcmp32Size); | 84 assert(Index < TableIcmp32Size); |
85 return TableIcmp32[Index].Mapping; | 85 return TableIcmp32[Index].Mapping; |
86 } | 86 } |
87 | 87 |
88 // Return the type which the elements of the vector have in the X86 | |
89 // representation. | |
90 Type getLoweredElementType(Type Ty) { | |
91 assert(isVectorType(Ty)); | |
92 switch (Ty) { | |
93 default: | |
Jim Stichnoth
2014/07/17 19:49:01
Can this be encoded in ICETYPEX8632_TABLE?
wala
2014/07/17 22:14:12
Done.
I've changed the name of this function to g
| |
94 return typeElementType(Ty); | |
95 case IceType_v4i1: | |
96 return IceType_i32; | |
97 case IceType_v8i1: | |
98 return IceType_i16; | |
99 case IceType_v16i1: | |
100 return IceType_i8; | |
101 } | |
102 } | |
103 | |
88 // The maximum number of arguments to pass in XMM registers | 104 // The maximum number of arguments to pass in XMM registers |
89 const unsigned X86_MAX_XMM_ARGS = 4; | 105 const unsigned X86_MAX_XMM_ARGS = 4; |
90 // The number of bits in a byte | 106 // The number of bits in a byte |
91 const unsigned X86_CHAR_BIT = 8; | 107 const unsigned X86_CHAR_BIT = 8; |
92 | 108 |
93 // In some cases, there are x-macros tables for both high-level and | 109 // In some cases, there are x-macros tables for both high-level and |
94 // low-level instructions/operands that use the same enum key value. | 110 // low-level instructions/operands that use the same enum key value. |
95 // The tables are kept separate to maintain a proper separation | 111 // The tables are kept separate to maintain a proper separation |
96 // between abstraction layers. There is a risk that the tables | 112 // between abstraction layers. There is a risk that the tables |
97 // could get out of sync if enum values are reordered or if entries | 113 // could get out of sync if enum values are reordered or if entries |
(...skipping 1840 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1938 case IceType_v4i32: | 1954 case IceType_v4i32: |
1939 case IceType_v4f32: { | 1955 case IceType_v4f32: { |
1940 _movp(Dest, legalizeToVar(Src0)); | 1956 _movp(Dest, legalizeToVar(Src0)); |
1941 } break; | 1957 } break; |
1942 } | 1958 } |
1943 break; | 1959 break; |
1944 } | 1960 } |
1945 } | 1961 } |
1946 } | 1962 } |
1947 | 1963 |
1964 void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { | |
1965 Operand *SourceVectOperand = Inst->getSrc(0); | |
1966 ConstantInteger *ElementIndex = | |
1967 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(1)); | |
1968 // Only constant indices are allowed in PNaCl IR. | |
1969 assert(ElementIndex); | |
1970 | |
1971 unsigned Index = ElementIndex->getValue(); | |
1972 Type Ty = SourceVectOperand->getType(); | |
1973 Type ElementTy = typeElementType(Ty); | |
1974 Type LoweredElementTy = getLoweredElementType(Ty); | |
1975 Variable *ExtractedElement = makeReg(LoweredElementTy); | |
1976 | |
1977 // TODO(wala): Determine the best lowering sequences for each type. | |
1978 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | |
1979 // Lower extractelement operations where the element is 32 bits | |
1980 // wide with pshufd. | |
1981 // TODO(wala): SSE4.1 has extractps and pextrd | |
1982 // | |
1983 // ALIGNHACK: Force vector operands to registers in instructions that | |
1984 // require aligned memory operands until support for stack alignment | |
1985 // is implemented. | |
1986 #define ALIGN_HACK(Vect) legalizeToVar(Vect) | |
Jim Stichnoth
2014/07/17 19:49:01
To be absolutely macro-safe, I think this should b
wala
2014/07/17 22:14:12
Done.
| |
1987 Operand *T = NULL; | |
1988 if (Index) { | |
1989 // The shuffle only needs to occur if the element to be extracted | |
1990 // is not at the lowest index. | |
1991 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); | |
1992 T = makeReg(Ty); | |
1993 _pshufd(llvm::cast<Variable>(T), ALIGN_HACK(SourceVectOperand), Mask); | |
1994 } else { | |
1995 // TODO(wala): If SourceVectOperand is in memory, express it as | |
1996 // mem32 so that the call to legalizeToVar() is made unnecessary. | |
1997 // _movd and _movss only take mem32 memory operands. | |
1998 T = legalizeToVar(SourceVectOperand); | |
1999 } | |
2000 | |
2001 if (LoweredElementTy == IceType_i32) { | |
2002 _movd(ExtractedElement, T); | |
2003 } else { // LoweredElementTy == IceType_f32 | |
2004 _movss(ExtractedElement, T); | |
2005 } | |
2006 #undef ALIGN_HACK | |
2007 } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) { | |
2008 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); | |
2009 _pextrw(ExtractedElement, legalizeToVar(SourceVectOperand), Mask); | |
2010 } else { | |
2011 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); | |
2012 // Spill the value to a stack slot and do the extraction in memory. | |
2013 // TODO(wala): SSE4.1 has pextrb. | |
2014 // | |
2015 // TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when | |
2016 // support for legalizing to mem is implemented. | |
2017 Variable *Slot = Func->makeVariable(Ty, Context.getNode()); | |
2018 Slot->setWeight(RegWeight::Zero); | |
2019 _movp(Slot, legalizeToVar(SourceVectOperand)); | |
2020 | |
2021 // Compute the location of the element in memory. | |
2022 unsigned Offset = Index * typeWidthInBytes(LoweredElementTy); | |
2023 OperandX8632Mem *Loc = | |
2024 getMemoryOperandForStackSlot(LoweredElementTy, Slot, Offset); | |
2025 _mov(ExtractedElement, Loc); | |
2026 } | |
2027 | |
2028 if (ElementTy == IceType_i1) { | |
2029 // Truncate extracted integers to i1s if necessary. | |
2030 Variable *T = makeReg(IceType_i1); | |
2031 InstCast *Cast = | |
2032 InstCast::create(Func, InstCast::Trunc, T, ExtractedElement); | |
2033 lowerCast(Cast); | |
2034 ExtractedElement = T; | |
2035 } | |
2036 | |
2037 // Copy the element to the destination. | |
2038 Variable *Dest = Inst->getDest(); | |
2039 _mov(Dest, ExtractedElement); | |
2040 } | |
2041 | |
1948 void TargetX8632::lowerFcmp(const InstFcmp *Inst) { | 2042 void TargetX8632::lowerFcmp(const InstFcmp *Inst) { |
1949 Operand *Src0 = Inst->getSrc(0); | 2043 Operand *Src0 = Inst->getSrc(0); |
1950 Operand *Src1 = Inst->getSrc(1); | 2044 Operand *Src1 = Inst->getSrc(1); |
1951 Variable *Dest = Inst->getDest(); | 2045 Variable *Dest = Inst->getDest(); |
1952 // Lowering a = fcmp cond, b, c | 2046 // Lowering a = fcmp cond, b, c |
1953 // ucomiss b, c /* only if C1 != Br_None */ | 2047 // ucomiss b, c /* only if C1 != Br_None */ |
1954 // /* but swap b,c order if SwapOperands==true */ | 2048 // /* but swap b,c order if SwapOperands==true */ |
1955 // mov a, <default> | 2049 // mov a, <default> |
1956 // j<C1> label /* only if C1 != Br_None */ | 2050 // j<C1> label /* only if C1 != Br_None */ |
1957 // j<C2> label /* only if C2 != Br_None */ | 2051 // j<C2> label /* only if C2 != Br_None */ |
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2069 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); | 2163 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); |
2070 InstX8632Label *Label = InstX8632Label::create(Func, this); | 2164 InstX8632Label *Label = InstX8632Label::create(Func, this); |
2071 _cmp(Src0New, Src1); | 2165 _cmp(Src0New, Src1); |
2072 _mov(Dest, One); | 2166 _mov(Dest, One); |
2073 _br(getIcmp32Mapping(Inst->getCondition()), Label); | 2167 _br(getIcmp32Mapping(Inst->getCondition()), Label); |
2074 Context.insert(InstFakeUse::create(Func, Dest)); | 2168 Context.insert(InstFakeUse::create(Func, Dest)); |
2075 _mov(Dest, Zero); | 2169 _mov(Dest, Zero); |
2076 Context.insert(Label); | 2170 Context.insert(Label); |
2077 } | 2171 } |
2078 | 2172 |
2173 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { | |
2174 Operand *SourceVectOperand = Inst->getSrc(0); | |
2175 Operand *ElementToInsert = Inst->getSrc(1); | |
2176 ConstantInteger *ElementIndex = | |
2177 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2)); | |
2178 // Only constant indices are allowed in PNaCl IR. | |
2179 assert(ElementIndex); | |
2180 unsigned Index = ElementIndex->getValue(); | |
2181 | |
2182 Type Ty = SourceVectOperand->getType(); | |
2183 Type ElementTy = typeElementType(Ty); | |
2184 Type LoweredElementTy = getLoweredElementType(Ty); | |
2185 | |
2186 if (ElementTy == IceType_i1) { | |
2187 // Expand the element to the appropriate size for it to be inserted | |
2188 // in the vector. | |
2189 Variable *Expanded = | |
2190 Func->makeVariable(LoweredElementTy, Context.getNode()); | |
2191 InstCast *Cast = | |
2192 InstCast::create(Func, InstCast::Zext, Expanded, ElementToInsert); | |
2193 lowerCast(Cast); | |
2194 ElementToInsert = Expanded; | |
2195 } | |
2196 | |
2197 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | |
2198 // Lower insertelement with 32-bit wide elements using shufps. | |
2199 // TODO(wala): SSE4.1 has pinsrd and insertps. | |
2200 // | |
2201 // ALIGNHACK: Force vector operands to registers in instructions that | |
2202 // require aligned memory operands until support for stack alignment | |
2203 // is implemented. | |
2204 #define ALIGN_HACK(Vect) legalizeToVar(Vect) | |
jvoung (off chromium)
2014/07/17 19:36:38
Could move ALIGN_HACK down (near the Index < 2), i
wala
2014/07/17 22:14:12
Done.
| |
2205 | |
2206 Variable *Element = NULL; | |
2207 if (LoweredElementTy == IceType_f32) { | |
2208 // Element will be in an XMM register since it is floating point. | |
2209 Element = legalizeToVar(ElementToInsert); | |
2210 } else { | |
2211 // Copy an integer to an XMM register. | |
2212 Operand *T = legalize(ElementToInsert, Legal_Reg | Legal_Mem); | |
2213 Element = makeReg(Ty); | |
2214 _movd(Element, T); | |
2215 } | |
2216 | |
2217 // shufps treats the source and desination operands as vectors of | |
2218 // four doublewords. The destination's two high doublewords are | |
2219 // selected from the source operand and the two low doublewords are | |
2220 // selected from the (original value of) the destination operand. | |
2221 // An insertelement operation can be effected with a sequence of two | |
2222 // shufps operations with appropriate masks. In all cases below, | |
2223 // Element[0] is being inserted into SourceVectOperand. Indices are | |
2224 // ordered from left to right. | |
2225 // | |
2226 // insertelement into index 0 (result is stored in Element): | |
2227 // Element := Element[0, 0] SourceVectOperand[0, 1] | |
2228 // Element := Element[0, 3] SourceVectOperand[2, 3] | |
2229 // | |
2230 // insertelement into index 1 (result is stored in Element): | |
2231 // Element := Element[0, 0] SourceVectOperand[0, 0] | |
2232 // Element := Element[3, 0] SourceVectOperand[2, 3] | |
2233 // | |
2234 // insertelement into index 2 (result is stored in T): | |
2235 // T := SourceVectOperand | |
2236 // Element := Element[0, 0] T[0, 3] | |
2237 // T := T[0, 1] Element[0, 3] | |
2238 // | |
2239 // insertelement into index 3 (result is stored in T): | |
2240 // T := SourceVectOperand | |
2241 // Element := Element[0, 0] T[0, 2] | |
2242 // T := T[0, 1] Element[3, 0] | |
2243 const unsigned char Mask1[4] = {64, 0, 192, 128}; | |
2244 const unsigned char Mask2[4] = {236, 227, 196, 52}; | |
2245 | |
2246 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index]); | |
2247 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index]); | |
2248 | |
2249 if (Index < 2) { | |
2250 SourceVectOperand = ALIGN_HACK(SourceVectOperand); | |
2251 _shufps(Element, SourceVectOperand, Mask1Constant); | |
2252 _shufps(Element, SourceVectOperand, Mask2Constant); | |
2253 _movp(Inst->getDest(), Element); | |
2254 } else { | |
2255 Variable *T = makeReg(Ty); | |
2256 _movp(T, SourceVectOperand); | |
2257 _shufps(Element, T, Mask1Constant); | |
2258 _shufps(T, Element, Mask2Constant); | |
2259 _movp(Inst->getDest(), T); | |
2260 } | |
2261 #undef ALIGN_HACK | |
2262 } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) { | |
2263 Operand *Element = legalize(ElementToInsert, Legal_Mem | Legal_Reg); | |
2264 Variable *T = makeReg(Ty); | |
2265 _movp(T, SourceVectOperand); | |
2266 _pinsrw(T, Element, Ctx->getConstantInt(IceType_i8, Index)); | |
2267 _movp(Inst->getDest(), T); | |
2268 } else { | |
2269 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); | |
2270 // Spill the value to a stack slot and perform the insertion in | |
2271 // memory. | |
2272 // TODO(wala): SSE4.1 has pinsrb. | |
2273 // | |
2274 // TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when | |
2275 // support for legalizing to mem is implemented. | |
2276 Variable *Slot = Func->makeVariable(Ty, Context.getNode()); | |
2277 Slot->setWeight(RegWeight::Zero); | |
2278 _movp(Slot, legalizeToVar(SourceVectOperand)); | |
2279 | |
2280 // Compute the location of the position to insert in memory. | |
2281 unsigned Offset = Index * typeWidthInBytes(LoweredElementTy); | |
2282 OperandX8632Mem *Loc = | |
2283 getMemoryOperandForStackSlot(LoweredElementTy, Slot, Offset); | |
2284 _store(legalizeToVar(ElementToInsert), Loc); | |
2285 | |
2286 Variable *T = makeReg(Ty); | |
2287 _movp(T, Slot); | |
2288 _movp(Inst->getDest(), T); | |
2289 } | |
2290 } | |
2291 | |
2079 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { | 2292 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
2080 switch (Instr->getIntrinsicInfo().ID) { | 2293 switch (Instr->getIntrinsicInfo().ID) { |
2081 case Intrinsics::AtomicCmpxchg: { | 2294 case Intrinsics::AtomicCmpxchg: { |
2082 if (!Intrinsics::VerifyMemoryOrder( | 2295 if (!Intrinsics::VerifyMemoryOrder( |
2083 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { | 2296 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { |
2084 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); | 2297 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); |
2085 return; | 2298 return; |
2086 } | 2299 } |
2087 if (!Intrinsics::VerifyMemoryOrder( | 2300 if (!Intrinsics::VerifyMemoryOrder( |
2088 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { | 2301 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { |
(...skipping 911 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3000 // this value is initialized using register operations. | 3213 // this value is initialized using register operations. |
3001 Variable *Dest = makeVectorOfZeros(Ty, RegNum); | 3214 Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
3002 Variable *MinusOne = makeReg(Ty); | 3215 Variable *MinusOne = makeReg(Ty); |
3003 // Insert a FakeDef so the live range of MinusOne is not overestimated. | 3216 // Insert a FakeDef so the live range of MinusOne is not overestimated. |
3004 Context.insert(InstFakeDef::create(Func, MinusOne)); | 3217 Context.insert(InstFakeDef::create(Func, MinusOne)); |
3005 _pcmpeq(MinusOne, MinusOne); | 3218 _pcmpeq(MinusOne, MinusOne); |
3006 _psub(Dest, MinusOne); | 3219 _psub(Dest, MinusOne); |
3007 return Dest; | 3220 return Dest; |
3008 } | 3221 } |
3009 | 3222 |
3223 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, | |
3224 Variable *Slot, | |
3225 uint32_t Offset) { | |
3226 // Ensure that Loc is a stack slot. | |
3227 assert(Slot->getWeight() == RegWeight::Zero); | |
Jim Stichnoth
2014/07/17 19:49:01
May also want to assert NoRegister.
wala
2014/07/17 22:14:12
Done.
| |
3228 // Compute the location of Loc in memory. | |
3229 // TODO(wala,stichnot): lea should not be required. The address of | |
3230 // the stack slot is known at compile time (although not until after | |
3231 // addProlog()). | |
3232 const Type PointerType = IceType_i32; | |
3233 Variable *Loc = makeReg(PointerType); | |
3234 _lea(Loc, Slot); | |
3235 Constant *ConstantOffset = Ctx->getConstantInt(IceType_i32, Offset); | |
3236 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); | |
3237 } | |
3238 | |
3010 // Helper for legalize() to emit the right code to lower an operand to a | 3239 // Helper for legalize() to emit the right code to lower an operand to a |
3011 // register of the appropriate type. | 3240 // register of the appropriate type. |
3012 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) { | 3241 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) { |
3013 Type Ty = Src->getType(); | 3242 Type Ty = Src->getType(); |
3014 Variable *Reg = makeReg(Ty, RegNum); | 3243 Variable *Reg = makeReg(Ty, RegNum); |
3015 if (isVectorType(Ty)) { | 3244 if (isVectorType(Ty)) { |
3016 _movp(Reg, Src); | 3245 _movp(Reg, Src); |
3017 } else { | 3246 } else { |
3018 _mov(Reg, Src); | 3247 _mov(Reg, Src); |
3019 } | 3248 } |
(...skipping 301 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3321 for (SizeT i = 0; i < Size; ++i) { | 3550 for (SizeT i = 0; i < Size; ++i) { |
3322 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 3551 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
3323 } | 3552 } |
3324 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 3553 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
3325 } | 3554 } |
3326 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 3555 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
3327 << "\n"; | 3556 << "\n"; |
3328 } | 3557 } |
3329 | 3558 |
3330 } // end of namespace Ice | 3559 } // end of namespace Ice |
OLD | NEW |