Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 78 #undef X | 78 #undef X |
| 79 }; | 79 }; |
| 80 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64); | 80 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64); |
| 81 | 81 |
| 82 InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { | 82 InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { |
| 83 size_t Index = static_cast<size_t>(Cond); | 83 size_t Index = static_cast<size_t>(Cond); |
| 84 assert(Index < TableIcmp32Size); | 84 assert(Index < TableIcmp32Size); |
| 85 return TableIcmp32[Index].Mapping; | 85 return TableIcmp32[Index].Mapping; |
| 86 } | 86 } |
| 87 | 87 |
| 88 // Return the type which the elements of the vector have in the X86 | |
| 89 // representation. | |
| 90 Type getLoweredElementType(Type Ty) { | |
| 91 assert(isVectorType(Ty)); | |
| 92 switch (Ty) { | |
| 93 default: | |
|
Jim Stichnoth
2014/07/17 19:49:01
Can this be encoded in ICETYPEX8632_TABLE?
wala
2014/07/17 22:14:12
Done.
I've changed the name of this function to g
| |
| 94 return typeElementType(Ty); | |
| 95 case IceType_v4i1: | |
| 96 return IceType_i32; | |
| 97 case IceType_v8i1: | |
| 98 return IceType_i16; | |
| 99 case IceType_v16i1: | |
| 100 return IceType_i8; | |
| 101 } | |
| 102 } | |
| 103 | |
| 88 // The maximum number of arguments to pass in XMM registers | 104 // The maximum number of arguments to pass in XMM registers |
| 89 const unsigned X86_MAX_XMM_ARGS = 4; | 105 const unsigned X86_MAX_XMM_ARGS = 4; |
| 90 // The number of bits in a byte | 106 // The number of bits in a byte |
| 91 const unsigned X86_CHAR_BIT = 8; | 107 const unsigned X86_CHAR_BIT = 8; |
| 92 | 108 |
| 93 // In some cases, there are x-macros tables for both high-level and | 109 // In some cases, there are x-macros tables for both high-level and |
| 94 // low-level instructions/operands that use the same enum key value. | 110 // low-level instructions/operands that use the same enum key value. |
| 95 // The tables are kept separate to maintain a proper separation | 111 // The tables are kept separate to maintain a proper separation |
| 96 // between abstraction layers. There is a risk that the tables | 112 // between abstraction layers. There is a risk that the tables |
| 97 // could get out of sync if enum values are reordered or if entries | 113 // could get out of sync if enum values are reordered or if entries |
| (...skipping 1840 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1938 case IceType_v4i32: | 1954 case IceType_v4i32: |
| 1939 case IceType_v4f32: { | 1955 case IceType_v4f32: { |
| 1940 _movp(Dest, legalizeToVar(Src0)); | 1956 _movp(Dest, legalizeToVar(Src0)); |
| 1941 } break; | 1957 } break; |
| 1942 } | 1958 } |
| 1943 break; | 1959 break; |
| 1944 } | 1960 } |
| 1945 } | 1961 } |
| 1946 } | 1962 } |
| 1947 | 1963 |
| 1964 void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { | |
| 1965 Operand *SourceVectOperand = Inst->getSrc(0); | |
| 1966 ConstantInteger *ElementIndex = | |
| 1967 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(1)); | |
| 1968 // Only constant indices are allowed in PNaCl IR. | |
| 1969 assert(ElementIndex); | |
| 1970 | |
| 1971 unsigned Index = ElementIndex->getValue(); | |
| 1972 Type Ty = SourceVectOperand->getType(); | |
| 1973 Type ElementTy = typeElementType(Ty); | |
| 1974 Type LoweredElementTy = getLoweredElementType(Ty); | |
| 1975 Variable *ExtractedElement = makeReg(LoweredElementTy); | |
| 1976 | |
| 1977 // TODO(wala): Determine the best lowering sequences for each type. | |
| 1978 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | |
| 1979 // Lower extractelement operations where the element is 32 bits | |
| 1980 // wide with pshufd. | |
| 1981 // TODO(wala): SSE4.1 has extractps and pextrd | |
| 1982 // | |
| 1983 // ALIGNHACK: Force vector operands to registers in instructions that | |
| 1984 // require aligned memory operands until support for stack alignment | |
| 1985 // is implemented. | |
| 1986 #define ALIGN_HACK(Vect) legalizeToVar(Vect) | |
|
Jim Stichnoth
2014/07/17 19:49:01
To be absolutely macro-safe, I think this should b
wala
2014/07/17 22:14:12
Done.
| |
| 1987 Operand *T = NULL; | |
| 1988 if (Index) { | |
| 1989 // The shuffle only needs to occur if the element to be extracted | |
| 1990 // is not at the lowest index. | |
| 1991 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); | |
| 1992 T = makeReg(Ty); | |
| 1993 _pshufd(llvm::cast<Variable>(T), ALIGN_HACK(SourceVectOperand), Mask); | |
| 1994 } else { | |
| 1995 // TODO(wala): If SourceVectOperand is in memory, express it as | |
| 1996 // mem32 so that the call to legalizeToVar() is made unnecessary. | |
| 1997 // _movd and _movss only take mem32 memory operands. | |
| 1998 T = legalizeToVar(SourceVectOperand); | |
| 1999 } | |
| 2000 | |
| 2001 if (LoweredElementTy == IceType_i32) { | |
| 2002 _movd(ExtractedElement, T); | |
| 2003 } else { // LoweredElementTy == IceType_f32 | |
| 2004 _movss(ExtractedElement, T); | |
| 2005 } | |
| 2006 #undef ALIGN_HACK | |
| 2007 } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) { | |
| 2008 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); | |
| 2009 _pextrw(ExtractedElement, legalizeToVar(SourceVectOperand), Mask); | |
| 2010 } else { | |
| 2011 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); | |
| 2012 // Spill the value to a stack slot and do the extraction in memory. | |
| 2013 // TODO(wala): SSE4.1 has pextrb. | |
| 2014 // | |
| 2015 // TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when | |
| 2016 // support for legalizing to mem is implemented. | |
| 2017 Variable *Slot = Func->makeVariable(Ty, Context.getNode()); | |
| 2018 Slot->setWeight(RegWeight::Zero); | |
| 2019 _movp(Slot, legalizeToVar(SourceVectOperand)); | |
| 2020 | |
| 2021 // Compute the location of the element in memory. | |
| 2022 unsigned Offset = Index * typeWidthInBytes(LoweredElementTy); | |
| 2023 OperandX8632Mem *Loc = | |
| 2024 getMemoryOperandForStackSlot(LoweredElementTy, Slot, Offset); | |
| 2025 _mov(ExtractedElement, Loc); | |
| 2026 } | |
| 2027 | |
| 2028 if (ElementTy == IceType_i1) { | |
| 2029 // Truncate extracted integers to i1s if necessary. | |
| 2030 Variable *T = makeReg(IceType_i1); | |
| 2031 InstCast *Cast = | |
| 2032 InstCast::create(Func, InstCast::Trunc, T, ExtractedElement); | |
| 2033 lowerCast(Cast); | |
| 2034 ExtractedElement = T; | |
| 2035 } | |
| 2036 | |
| 2037 // Copy the element to the destination. | |
| 2038 Variable *Dest = Inst->getDest(); | |
| 2039 _mov(Dest, ExtractedElement); | |
| 2040 } | |
| 2041 | |
| 1948 void TargetX8632::lowerFcmp(const InstFcmp *Inst) { | 2042 void TargetX8632::lowerFcmp(const InstFcmp *Inst) { |
| 1949 Operand *Src0 = Inst->getSrc(0); | 2043 Operand *Src0 = Inst->getSrc(0); |
| 1950 Operand *Src1 = Inst->getSrc(1); | 2044 Operand *Src1 = Inst->getSrc(1); |
| 1951 Variable *Dest = Inst->getDest(); | 2045 Variable *Dest = Inst->getDest(); |
| 1952 // Lowering a = fcmp cond, b, c | 2046 // Lowering a = fcmp cond, b, c |
| 1953 // ucomiss b, c /* only if C1 != Br_None */ | 2047 // ucomiss b, c /* only if C1 != Br_None */ |
| 1954 // /* but swap b,c order if SwapOperands==true */ | 2048 // /* but swap b,c order if SwapOperands==true */ |
| 1955 // mov a, <default> | 2049 // mov a, <default> |
| 1956 // j<C1> label /* only if C1 != Br_None */ | 2050 // j<C1> label /* only if C1 != Br_None */ |
| 1957 // j<C2> label /* only if C2 != Br_None */ | 2051 // j<C2> label /* only if C2 != Br_None */ |
| (...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2069 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); | 2163 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); |
| 2070 InstX8632Label *Label = InstX8632Label::create(Func, this); | 2164 InstX8632Label *Label = InstX8632Label::create(Func, this); |
| 2071 _cmp(Src0New, Src1); | 2165 _cmp(Src0New, Src1); |
| 2072 _mov(Dest, One); | 2166 _mov(Dest, One); |
| 2073 _br(getIcmp32Mapping(Inst->getCondition()), Label); | 2167 _br(getIcmp32Mapping(Inst->getCondition()), Label); |
| 2074 Context.insert(InstFakeUse::create(Func, Dest)); | 2168 Context.insert(InstFakeUse::create(Func, Dest)); |
| 2075 _mov(Dest, Zero); | 2169 _mov(Dest, Zero); |
| 2076 Context.insert(Label); | 2170 Context.insert(Label); |
| 2077 } | 2171 } |
| 2078 | 2172 |
| 2173 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { | |
| 2174 Operand *SourceVectOperand = Inst->getSrc(0); | |
| 2175 Operand *ElementToInsert = Inst->getSrc(1); | |
| 2176 ConstantInteger *ElementIndex = | |
| 2177 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2)); | |
| 2178 // Only constant indices are allowed in PNaCl IR. | |
| 2179 assert(ElementIndex); | |
| 2180 unsigned Index = ElementIndex->getValue(); | |
| 2181 | |
| 2182 Type Ty = SourceVectOperand->getType(); | |
| 2183 Type ElementTy = typeElementType(Ty); | |
| 2184 Type LoweredElementTy = getLoweredElementType(Ty); | |
| 2185 | |
| 2186 if (ElementTy == IceType_i1) { | |
| 2187 // Expand the element to the appropriate size for it to be inserted | |
| 2188 // in the vector. | |
| 2189 Variable *Expanded = | |
| 2190 Func->makeVariable(LoweredElementTy, Context.getNode()); | |
| 2191 InstCast *Cast = | |
| 2192 InstCast::create(Func, InstCast::Zext, Expanded, ElementToInsert); | |
| 2193 lowerCast(Cast); | |
| 2194 ElementToInsert = Expanded; | |
| 2195 } | |
| 2196 | |
| 2197 if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | |
| 2198 // Lower insertelement with 32-bit wide elements using shufps. | |
| 2199 // TODO(wala): SSE4.1 has pinsrd and insertps. | |
| 2200 // | |
| 2201 // ALIGNHACK: Force vector operands to registers in instructions that | |
| 2202 // require aligned memory operands until support for stack alignment | |
| 2203 // is implemented. | |
| 2204 #define ALIGN_HACK(Vect) legalizeToVar(Vect) | |
|
jvoung (off chromium)
2014/07/17 19:36:38
Could move ALIGN_HACK down (near the Index < 2), i
wala
2014/07/17 22:14:12
Done.
| |
| 2205 | |
| 2206 Variable *Element = NULL; | |
| 2207 if (LoweredElementTy == IceType_f32) { | |
| 2208 // Element will be in an XMM register since it is floating point. | |
| 2209 Element = legalizeToVar(ElementToInsert); | |
| 2210 } else { | |
| 2211 // Copy an integer to an XMM register. | |
| 2212 Operand *T = legalize(ElementToInsert, Legal_Reg | Legal_Mem); | |
| 2213 Element = makeReg(Ty); | |
| 2214 _movd(Element, T); | |
| 2215 } | |
| 2216 | |
| 2217 // shufps treats the source and desination operands as vectors of | |
| 2218 // four doublewords. The destination's two high doublewords are | |
| 2219 // selected from the source operand and the two low doublewords are | |
| 2220 // selected from the (original value of) the destination operand. | |
| 2221 // An insertelement operation can be effected with a sequence of two | |
| 2222 // shufps operations with appropriate masks. In all cases below, | |
| 2223 // Element[0] is being inserted into SourceVectOperand. Indices are | |
| 2224 // ordered from left to right. | |
| 2225 // | |
| 2226 // insertelement into index 0 (result is stored in Element): | |
| 2227 // Element := Element[0, 0] SourceVectOperand[0, 1] | |
| 2228 // Element := Element[0, 3] SourceVectOperand[2, 3] | |
| 2229 // | |
| 2230 // insertelement into index 1 (result is stored in Element): | |
| 2231 // Element := Element[0, 0] SourceVectOperand[0, 0] | |
| 2232 // Element := Element[3, 0] SourceVectOperand[2, 3] | |
| 2233 // | |
| 2234 // insertelement into index 2 (result is stored in T): | |
| 2235 // T := SourceVectOperand | |
| 2236 // Element := Element[0, 0] T[0, 3] | |
| 2237 // T := T[0, 1] Element[0, 3] | |
| 2238 // | |
| 2239 // insertelement into index 3 (result is stored in T): | |
| 2240 // T := SourceVectOperand | |
| 2241 // Element := Element[0, 0] T[0, 2] | |
| 2242 // T := T[0, 1] Element[3, 0] | |
| 2243 const unsigned char Mask1[4] = {64, 0, 192, 128}; | |
| 2244 const unsigned char Mask2[4] = {236, 227, 196, 52}; | |
| 2245 | |
| 2246 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index]); | |
| 2247 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index]); | |
| 2248 | |
| 2249 if (Index < 2) { | |
| 2250 SourceVectOperand = ALIGN_HACK(SourceVectOperand); | |
| 2251 _shufps(Element, SourceVectOperand, Mask1Constant); | |
| 2252 _shufps(Element, SourceVectOperand, Mask2Constant); | |
| 2253 _movp(Inst->getDest(), Element); | |
| 2254 } else { | |
| 2255 Variable *T = makeReg(Ty); | |
| 2256 _movp(T, SourceVectOperand); | |
| 2257 _shufps(Element, T, Mask1Constant); | |
| 2258 _shufps(T, Element, Mask2Constant); | |
| 2259 _movp(Inst->getDest(), T); | |
| 2260 } | |
| 2261 #undef ALIGN_HACK | |
| 2262 } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) { | |
| 2263 Operand *Element = legalize(ElementToInsert, Legal_Mem | Legal_Reg); | |
| 2264 Variable *T = makeReg(Ty); | |
| 2265 _movp(T, SourceVectOperand); | |
| 2266 _pinsrw(T, Element, Ctx->getConstantInt(IceType_i8, Index)); | |
| 2267 _movp(Inst->getDest(), T); | |
| 2268 } else { | |
| 2269 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); | |
| 2270 // Spill the value to a stack slot and perform the insertion in | |
| 2271 // memory. | |
| 2272 // TODO(wala): SSE4.1 has pinsrb. | |
| 2273 // | |
| 2274 // TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when | |
| 2275 // support for legalizing to mem is implemented. | |
| 2276 Variable *Slot = Func->makeVariable(Ty, Context.getNode()); | |
| 2277 Slot->setWeight(RegWeight::Zero); | |
| 2278 _movp(Slot, legalizeToVar(SourceVectOperand)); | |
| 2279 | |
| 2280 // Compute the location of the position to insert in memory. | |
| 2281 unsigned Offset = Index * typeWidthInBytes(LoweredElementTy); | |
| 2282 OperandX8632Mem *Loc = | |
| 2283 getMemoryOperandForStackSlot(LoweredElementTy, Slot, Offset); | |
| 2284 _store(legalizeToVar(ElementToInsert), Loc); | |
| 2285 | |
| 2286 Variable *T = makeReg(Ty); | |
| 2287 _movp(T, Slot); | |
| 2288 _movp(Inst->getDest(), T); | |
| 2289 } | |
| 2290 } | |
| 2291 | |
| 2079 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { | 2292 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
| 2080 switch (Instr->getIntrinsicInfo().ID) { | 2293 switch (Instr->getIntrinsicInfo().ID) { |
| 2081 case Intrinsics::AtomicCmpxchg: { | 2294 case Intrinsics::AtomicCmpxchg: { |
| 2082 if (!Intrinsics::VerifyMemoryOrder( | 2295 if (!Intrinsics::VerifyMemoryOrder( |
| 2083 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { | 2296 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { |
| 2084 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); | 2297 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); |
| 2085 return; | 2298 return; |
| 2086 } | 2299 } |
| 2087 if (!Intrinsics::VerifyMemoryOrder( | 2300 if (!Intrinsics::VerifyMemoryOrder( |
| 2088 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { | 2301 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { |
| (...skipping 911 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3000 // this value is initialized using register operations. | 3213 // this value is initialized using register operations. |
| 3001 Variable *Dest = makeVectorOfZeros(Ty, RegNum); | 3214 Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
| 3002 Variable *MinusOne = makeReg(Ty); | 3215 Variable *MinusOne = makeReg(Ty); |
| 3003 // Insert a FakeDef so the live range of MinusOne is not overestimated. | 3216 // Insert a FakeDef so the live range of MinusOne is not overestimated. |
| 3004 Context.insert(InstFakeDef::create(Func, MinusOne)); | 3217 Context.insert(InstFakeDef::create(Func, MinusOne)); |
| 3005 _pcmpeq(MinusOne, MinusOne); | 3218 _pcmpeq(MinusOne, MinusOne); |
| 3006 _psub(Dest, MinusOne); | 3219 _psub(Dest, MinusOne); |
| 3007 return Dest; | 3220 return Dest; |
| 3008 } | 3221 } |
| 3009 | 3222 |
| 3223 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, | |
| 3224 Variable *Slot, | |
| 3225 uint32_t Offset) { | |
| 3226 // Ensure that Loc is a stack slot. | |
| 3227 assert(Slot->getWeight() == RegWeight::Zero); | |
|
Jim Stichnoth
2014/07/17 19:49:01
May also want to assert NoRegister.
wala
2014/07/17 22:14:12
Done.
| |
| 3228 // Compute the location of Loc in memory. | |
| 3229 // TODO(wala,stichnot): lea should not be required. The address of | |
| 3230 // the stack slot is known at compile time (although not until after | |
| 3231 // addProlog()). | |
| 3232 const Type PointerType = IceType_i32; | |
| 3233 Variable *Loc = makeReg(PointerType); | |
| 3234 _lea(Loc, Slot); | |
| 3235 Constant *ConstantOffset = Ctx->getConstantInt(IceType_i32, Offset); | |
| 3236 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); | |
| 3237 } | |
| 3238 | |
| 3010 // Helper for legalize() to emit the right code to lower an operand to a | 3239 // Helper for legalize() to emit the right code to lower an operand to a |
| 3011 // register of the appropriate type. | 3240 // register of the appropriate type. |
| 3012 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) { | 3241 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) { |
| 3013 Type Ty = Src->getType(); | 3242 Type Ty = Src->getType(); |
| 3014 Variable *Reg = makeReg(Ty, RegNum); | 3243 Variable *Reg = makeReg(Ty, RegNum); |
| 3015 if (isVectorType(Ty)) { | 3244 if (isVectorType(Ty)) { |
| 3016 _movp(Reg, Src); | 3245 _movp(Reg, Src); |
| 3017 } else { | 3246 } else { |
| 3018 _mov(Reg, Src); | 3247 _mov(Reg, Src); |
| 3019 } | 3248 } |
| (...skipping 301 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3321 for (SizeT i = 0; i < Size; ++i) { | 3550 for (SizeT i = 0; i < Size; ++i) { |
| 3322 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 3551 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
| 3323 } | 3552 } |
| 3324 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 3553 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
| 3325 } | 3554 } |
| 3326 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 3555 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
| 3327 << "\n"; | 3556 << "\n"; |
| 3328 } | 3557 } |
| 3329 | 3558 |
| 3330 } // end of namespace Ice | 3559 } // end of namespace Ice |
| OLD | NEW |