OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
81 | 81 |
82 InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { | 82 InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { |
83 size_t Index = static_cast<size_t>(Cond); | 83 size_t Index = static_cast<size_t>(Cond); |
84 assert(Index < TableIcmp32Size); | 84 assert(Index < TableIcmp32Size); |
85 return TableIcmp32[Index].Mapping; | 85 return TableIcmp32[Index].Mapping; |
86 } | 86 } |
87 | 87 |
88 // The maximum number of arguments to pass in XMM registers | 88 // The maximum number of arguments to pass in XMM registers |
89 const unsigned X86_MAX_XMM_ARGS = 4; | 89 const unsigned X86_MAX_XMM_ARGS = 4; |
90 | 90 |
91 // Return a string representation of the type that is suitable for use | |
92 // in an identifier. | |
93 IceString typeIdentString(const Type Ty) { | |
Jim Stichnoth
2014/07/16 19:17:10
Put inside an anonymous namespace
wala
2014/07/17 01:34:53
This is already inside an anonymous namespace.
Jim Stichnoth
2014/07/17 13:03:13
D'oh! Sorry!
| |
94 IceString Str; | |
95 llvm::raw_string_ostream BaseOS(Str); | |
96 Ostream OS(&BaseOS); | |
97 if (isVectorType(Ty)) { | |
98 OS << "v" << typeNumElements(Ty) << typeElementType(Ty); | |
99 } else { | |
100 OS << Ty; | |
101 } | |
102 return BaseOS.str(); | |
103 } | |
104 | |
91 // In some cases, there are x-macros tables for both high-level and | 105 // In some cases, there are x-macros tables for both high-level and |
92 // low-level instructions/operands that use the same enum key value. | 106 // low-level instructions/operands that use the same enum key value. |
93 // The tables are kept separate to maintain a proper separation | 107 // The tables are kept separate to maintain a proper separation |
94 // between abstraction layers. There is a risk that the tables | 108 // between abstraction layers. There is a risk that the tables |
95 // could get out of sync if enum values are reordered or if entries | 109 // could get out of sync if enum values are reordered or if entries |
96 // are added or deleted. This dummy function uses static_assert to | 110 // are added or deleted. This dummy function uses static_assert to |
97 // ensure everything is kept in sync. | 111 // ensure everything is kept in sync. |
98 void xMacroIntegrityCheck() { | 112 void xMacroIntegrityCheck() { |
99 // Validate the enum values in FCMPX8632_TABLE. | 113 // Validate the enum values in FCMPX8632_TABLE. |
100 { | 114 { |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
150 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); | 164 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); |
151 ICEINSTICMP_TABLE; | 165 ICEINSTICMP_TABLE; |
152 #undef X | 166 #undef X |
153 } | 167 } |
154 | 168 |
155 // Validate the enum values in ICETYPEX8632_TABLE. | 169 // Validate the enum values in ICETYPEX8632_TABLE. |
156 { | 170 { |
157 // Define a temporary set of enum values based on low-level | 171 // Define a temporary set of enum values based on low-level |
158 // table entries. | 172 // table entries. |
159 enum _tmp_enum { | 173 enum _tmp_enum { |
160 #define X(tag, cvt, sdss, width) _tmp_##tag, | 174 #define X(tag, cvt, sdss, pack, width) _tmp_##tag, |
161 ICETYPEX8632_TABLE | 175 ICETYPEX8632_TABLE |
162 #undef X | 176 #undef X |
163 _num | 177 _num |
164 }; | 178 }; |
165 // Define a set of constants based on high-level table entries. | 179 // Define a set of constants based on high-level table entries. |
166 #define X(tag, size, align, elts, elty, str) \ | 180 #define X(tag, size, align, elts, elty, str) \ |
167 static const int _table1_##tag = tag; | 181 static const int _table1_##tag = tag; |
168 ICETYPE_TABLE; | 182 ICETYPE_TABLE; |
169 #undef X | 183 #undef X |
170 // Define a set of constants based on low-level table entries, | 184 // Define a set of constants based on low-level table entries, |
171 // and ensure the table entry keys are consistent. | 185 // and ensure the table entry keys are consistent. |
172 #define X(tag, cvt, sdss, width) \ | 186 #define X(tag, cvt, sdss, pack, width) \ |
173 static const int _table2_##tag = _tmp_##tag; \ | 187 static const int _table2_##tag = _tmp_##tag; \ |
174 STATIC_ASSERT(_table1_##tag == _table2_##tag); | 188 STATIC_ASSERT(_table1_##tag == _table2_##tag); |
175 ICETYPEX8632_TABLE; | 189 ICETYPEX8632_TABLE; |
176 #undef X | 190 #undef X |
177 // Repeat the static asserts with respect to the high-level | 191 // Repeat the static asserts with respect to the high-level |
178 // table entries in case the high-level table has extra entries. | 192 // table entries in case the high-level table has extra entries. |
179 #define X(tag, size, align, elts, elty, str) \ | 193 #define X(tag, size, align, elts, elty, str) \ |
180 STATIC_ASSERT(_table1_##tag == _table2_##tag); | 194 STATIC_ASSERT(_table1_##tag == _table2_##tag); |
181 ICETYPE_TABLE; | 195 ICETYPE_TABLE; |
182 #undef X | 196 #undef X |
(...skipping 947 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1130 } break; | 1144 } break; |
1131 case InstArithmetic::Fadd: | 1145 case InstArithmetic::Fadd: |
1132 case InstArithmetic::Fsub: | 1146 case InstArithmetic::Fsub: |
1133 case InstArithmetic::Fmul: | 1147 case InstArithmetic::Fmul: |
1134 case InstArithmetic::Fdiv: | 1148 case InstArithmetic::Fdiv: |
1135 case InstArithmetic::Frem: | 1149 case InstArithmetic::Frem: |
1136 llvm_unreachable("FP instruction with i64 type"); | 1150 llvm_unreachable("FP instruction with i64 type"); |
1137 break; | 1151 break; |
1138 } | 1152 } |
1139 } else if (isVectorType(Dest->getType())) { | 1153 } else if (isVectorType(Dest->getType())) { |
1154 // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in | |
1155 // registers. This is a workaround of the fact that there is no | |
1156 // support for aligning stack operands. Once alignment support is | |
1157 // implemented, replace legalizeToVar(Src1) with Src1. | |
Jim Stichnoth
2014/07/16 19:17:10
I'm not adamant about this, but I think it might b
wala
2014/07/17 01:34:54
Good idea. Done.
| |
1158 // | |
1159 // TODO: Trap on divide and modulo by zero. | |
Jim Stichnoth
2014/07/16 19:17:10
I believe this should only trap for integer div/mo
wala
2014/07/17 01:34:53
Done.
| |
1160 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899 | |
1140 switch (Inst->getOp()) { | 1161 switch (Inst->getOp()) { |
1141 case InstArithmetic::_num: | 1162 case InstArithmetic::_num: |
1142 llvm_unreachable("Unknown arithmetic operator"); | 1163 llvm_unreachable("Unknown arithmetic operator"); |
1143 break; | 1164 break; |
1144 case InstArithmetic::Add: | 1165 case InstArithmetic::Add: { |
1145 case InstArithmetic::And: | 1166 Variable *T = makeReg(Dest->getType()); |
1146 case InstArithmetic::Or: | 1167 _movp(T, Src0); |
1147 case InstArithmetic::Xor: | 1168 _padd(T, legalizeToVar(Src1)); |
1148 case InstArithmetic::Sub: | 1169 _movp(Dest, T); |
1149 case InstArithmetic::Mul: | 1170 } break; |
1150 case InstArithmetic::Shl: | 1171 case InstArithmetic::And: { |
1151 case InstArithmetic::Lshr: | 1172 Variable *T = makeReg(Dest->getType()); |
1152 case InstArithmetic::Ashr: | 1173 _movp(T, Src0); |
1153 case InstArithmetic::Udiv: | 1174 _pand(T, legalizeToVar(Src1)); |
1154 case InstArithmetic::Sdiv: | 1175 _movp(Dest, T); |
1155 case InstArithmetic::Urem: | 1176 } break; |
1156 case InstArithmetic::Srem: | 1177 case InstArithmetic::Or: { |
1157 // TODO(wala): Handle these. | 1178 Variable *T = makeReg(Dest->getType()); |
1158 Func->setError("Unhandled instruction"); | 1179 _movp(T, Src0); |
1159 break; | 1180 _por(T, legalizeToVar(Src1)); |
1181 _movp(Dest, T); | |
1182 } break; | |
1183 case InstArithmetic::Xor: { | |
1184 Variable *T = makeReg(Dest->getType()); | |
1185 _movp(T, Src0); | |
1186 _pxor(T, legalizeToVar(Src1)); | |
1187 _movp(Dest, T); | |
1188 } break; | |
1189 case InstArithmetic::Sub: { | |
1190 Variable *T = makeReg(Dest->getType()); | |
1191 _movp(T, Src0); | |
1192 _psub(T, legalizeToVar(Src1)); | |
1193 _movp(Dest, T); | |
1194 } break; | |
1195 case InstArithmetic::Mul: { | |
1196 if (Dest->getType() == IceType_v4i32) { | |
1197 // Lowering sequence: | |
1198 // movups T1, Src0 | |
1199 // pshufd T2, Src0, [1,0,3,0] | |
jvoung (off chromium)
2014/07/16 19:23:26
nit: The ordering of the vector ([1, 0, 3, 0]) is
wala
2014/07/17 01:34:53
I write vectors as if they were arrays in the comm
jvoung (off chromium)
2014/07/17 15:00:38
Okay, that helps some.
| |
1200 // pshufd T3, Src1, [1,0,3,0] | |
1201 // # T1 = { Src0[0] * Src1[0], Src0[2] * Src1[2] } | |
1202 // pmuludq T1, Src1 | |
1203 // # T2 = { Src0[1] * Src1[1], Src0[3] * Src1[3] } | |
1204 // pmuludq T2, T3 | |
1205 // # T1 = { lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2]) } | |
1206 // shufps T1, T2, [0,2,0,2] | |
1207 // pshufd T4, T1, [0,2,1,3] | |
1208 // movups Dest, T4 | |
1209 // | |
1210 // TODO(wala): SSE4.1 has pmulld. | |
1211 | |
1212 // Mask that directs pshufd to create a vector with entries | |
1213 // Src[1, 0, 3, 0] | |
1214 const unsigned Constant1030 = 0x31; | |
1215 Constant *Mask1030 = Ctx->getConstantInt(IceType_i8, Constant1030); | |
1216 // Mask that directs shufps to create a vector with entries | |
1217 // Dest[0, 2], Src[0, 2] | |
1218 const unsigned Mask0202 = 0x88; | |
1219 // Mask that directs pshufd to create a vector with entries | |
1220 // Src[0, 2, 1, 3] | |
1221 const unsigned Mask0213 = 0xd8; | |
1222 Variable *T1 = makeReg(IceType_v4i32); | |
1223 Variable *T2 = makeReg(IceType_v4i32); | |
1224 Variable *T3 = makeReg(IceType_v4i32); | |
1225 Variable *T4 = makeReg(IceType_v4i32); | |
1226 _movp(T1, Src0); | |
1227 // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R | |
1228 // with Src1 after stack operand alignment support is | |
1229 // implemented. | |
1230 Variable *Src0R = legalizeToVar(Src0); | |
1231 Variable *Src1R = legalizeToVar(Src1); | |
1232 _pshufd(T2, Src0R, Mask1030); | |
1233 _pshufd(T3, Src1R, Mask1030); | |
1234 _pmuludq(T1, Src1R); | |
1235 _pmuludq(T2, T3); | |
1236 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); | |
1237 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); | |
1238 _movp(Dest, T4); | |
1239 } else if (Dest->getType() == IceType_v8i16) { | |
1240 Variable *T = makeReg(IceType_v8i16); | |
1241 _movp(T, Src0); | |
1242 _pmullw(T, legalizeToVar(Src1)); | |
1243 _movp(Dest, T); | |
1244 } else { | |
1245 assert(Dest->getType() == IceType_v16i8); | |
1246 // Sz_mul_v16i8 | |
1247 const IceString Helper = "Sz_mul_v16i8"; | |
1248 const SizeT MaxSrcs = 2; | |
1249 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
1250 Call->addArg(Src0); | |
1251 Call->addArg(Src1); | |
1252 lowerCall(Call); | |
1253 } | |
1254 } break; | |
1255 case InstArithmetic::Shl: { | |
1256 // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8 | |
1257 const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType()); | |
1258 const SizeT MaxSrcs = 2; | |
1259 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
1260 Call->addArg(Src0); | |
1261 Call->addArg(Src1); | |
1262 lowerCall(Call); | |
1263 } break; | |
1264 case InstArithmetic::Lshr: { | |
1265 // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8 | |
1266 const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType()); | |
1267 const SizeT MaxSrcs = 2; | |
1268 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
1269 Call->addArg(Src0); | |
1270 Call->addArg(Src1); | |
1271 lowerCall(Call); | |
1272 } break; | |
1273 case InstArithmetic::Ashr: { | |
1274 // Sz_ashr_v4i32, Sz_ashr_v8i16, Sz_ashr_v16i8 | |
1275 const IceString Helper = "Sz_ashr_" + typeIdentString(Dest->getType()); | |
1276 const SizeT MaxSrcs = 2; | |
1277 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
1278 Call->addArg(Src0); | |
1279 Call->addArg(Src1); | |
1280 lowerCall(Call); | |
1281 } break; | |
1282 case InstArithmetic::Udiv: { | |
1283 // Sz_udiv_v4i32, Sz_udiv_v8i16, Sz_udiv_v16i8 | |
1284 const IceString Helper = "Sz_udiv_" + typeIdentString(Dest->getType()); | |
1285 const SizeT MaxSrcs = 2; | |
1286 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
1287 Call->addArg(Src0); | |
1288 Call->addArg(Src1); | |
1289 lowerCall(Call); | |
1290 } break; | |
1291 case InstArithmetic::Sdiv: { | |
1292 // Sz_sdiv_v4i32, Sz_sdiv_v8i16, Sz_sdiv_v16i8 | |
1293 const IceString Helper = "Sz_sdiv_" + typeIdentString(Dest->getType()); | |
1294 const SizeT MaxSrcs = 2; | |
1295 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
1296 Call->addArg(Src0); | |
1297 Call->addArg(Src1); | |
1298 lowerCall(Call); | |
1299 } break; | |
1300 case InstArithmetic::Urem: { | |
1301 // Sz_urem_v4i32, Sz_urem_v8i16, Sz_urem_v16i8 | |
1302 const IceString Helper = "Sz_urem_" + typeIdentString(Dest->getType()); | |
1303 const SizeT MaxSrcs = 2; | |
1304 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
1305 Call->addArg(Src0); | |
1306 Call->addArg(Src1); | |
1307 lowerCall(Call); | |
1308 } break; | |
1309 case InstArithmetic::Srem: { | |
1310 // Sz_srem_v4i32, Sz_srem_v8i16, Sz_srem_v16i8 | |
1311 const IceString Helper = "Sz_srem_" + typeIdentString(Dest->getType()); | |
1312 const SizeT MaxSrcs = 2; | |
1313 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
1314 Call->addArg(Src0); | |
1315 Call->addArg(Src1); | |
1316 lowerCall(Call); | |
1317 } break; | |
1160 case InstArithmetic::Fadd: { | 1318 case InstArithmetic::Fadd: { |
1161 Variable *T = makeReg(Dest->getType()); | 1319 Variable *T = makeReg(Dest->getType()); |
1162 _movp(T, Src0); | 1320 _movp(T, Src0); |
1163 _addps(T, Src1); | 1321 _addps(T, legalizeToVar(Src1)); |
1164 _movp(Dest, T); | 1322 _movp(Dest, T); |
1165 } break; | 1323 } break; |
1166 case InstArithmetic::Fsub: { | 1324 case InstArithmetic::Fsub: { |
1167 Variable *T = makeReg(Dest->getType()); | 1325 Variable *T = makeReg(Dest->getType()); |
1168 _movp(T, Src0); | 1326 _movp(T, Src0); |
1169 _subps(T, Src1); | 1327 _subps(T, legalizeToVar(Src1)); |
1170 _movp(Dest, T); | 1328 _movp(Dest, T); |
1171 } break; | 1329 } break; |
1172 case InstArithmetic::Fmul: { | 1330 case InstArithmetic::Fmul: { |
1173 Variable *T = makeReg(Dest->getType()); | 1331 Variable *T = makeReg(Dest->getType()); |
1174 _movp(T, Src0); | 1332 _movp(T, Src0); |
1175 _mulps(T, Src1); | 1333 _mulps(T, legalizeToVar(Src1)); |
1176 _movp(Dest, T); | 1334 _movp(Dest, T); |
1177 } break; | 1335 } break; |
1178 case InstArithmetic::Fdiv: { | 1336 case InstArithmetic::Fdiv: { |
1179 Variable *T = makeReg(Dest->getType()); | 1337 Variable *T = makeReg(Dest->getType()); |
1180 _movp(T, Src0); | 1338 _movp(T, Src0); |
1181 _divps(T, Src1); | 1339 _divps(T, legalizeToVar(Src1)); |
1182 _movp(Dest, T); | 1340 _movp(Dest, T); |
1183 } break; | 1341 } break; |
1184 case InstArithmetic::Frem: { | 1342 case InstArithmetic::Frem: { |
1185 const SizeT MaxSrcs = 2; | 1343 const SizeT MaxSrcs = 2; |
1186 InstCall *Call = makeHelperCall("__frem_v4f32", Dest, MaxSrcs); | 1344 InstCall *Call = makeHelperCall("Sz_frem_v4f32", Dest, MaxSrcs); |
1187 Call->addArg(Src0); | 1345 Call->addArg(Src0); |
1188 Call->addArg(Src1); | 1346 Call->addArg(Src1); |
1189 lowerCall(Call); | 1347 lowerCall(Call); |
1190 } break; | 1348 } break; |
1191 } | 1349 } |
1192 } else { // Dest->getType() is non-i64 scalar | 1350 } else { // Dest->getType() is non-i64 scalar |
1193 Variable *T_edx = NULL; | 1351 Variable *T_edx = NULL; |
1194 Variable *T = NULL; | 1352 Variable *T = NULL; |
1195 switch (Inst->getOp()) { | 1353 switch (Inst->getOp()) { |
1196 case InstArithmetic::_num: | 1354 case InstArithmetic::_num: |
(...skipping 1873 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3070 for (SizeT i = 0; i < Size; ++i) { | 3228 for (SizeT i = 0; i < Size; ++i) { |
3071 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 3229 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
3072 } | 3230 } |
3073 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 3231 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
3074 } | 3232 } |
3075 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 3233 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
3076 << "\n"; | 3234 << "\n"; |
3077 } | 3235 } |
3078 | 3236 |
3079 } // end of namespace Ice | 3237 } // end of namespace Ice |
OLD | NEW |