Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
| (...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 81 | 81 |
| 82 InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { | 82 InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { |
| 83 size_t Index = static_cast<size_t>(Cond); | 83 size_t Index = static_cast<size_t>(Cond); |
| 84 assert(Index < TableIcmp32Size); | 84 assert(Index < TableIcmp32Size); |
| 85 return TableIcmp32[Index].Mapping; | 85 return TableIcmp32[Index].Mapping; |
| 86 } | 86 } |
| 87 | 87 |
| 88 // The maximum number of arguments to pass in XMM registers | 88 // The maximum number of arguments to pass in XMM registers |
| 89 const unsigned X86_MAX_XMM_ARGS = 4; | 89 const unsigned X86_MAX_XMM_ARGS = 4; |
| 90 | 90 |
| 91 // Return a string representation of the type that is suitable for use | |
| 92 // in an identifier. | |
| 93 IceString typeIdentString(const Type Ty) { | |
|
Jim Stichnoth
2014/07/16 19:17:10
Put inside an anonymous namespace
wala
2014/07/17 01:34:53
This is already inside an anonymous namespace.
Jim Stichnoth
2014/07/17 13:03:13
D'oh! Sorry!
| |
| 94 IceString Str; | |
| 95 llvm::raw_string_ostream BaseOS(Str); | |
| 96 Ostream OS(&BaseOS); | |
| 97 if (isVectorType(Ty)) { | |
| 98 OS << "v" << typeNumElements(Ty) << typeElementType(Ty); | |
| 99 } else { | |
| 100 OS << Ty; | |
| 101 } | |
| 102 return BaseOS.str(); | |
| 103 } | |
| 104 | |
| 91 // In some cases, there are x-macros tables for both high-level and | 105 // In some cases, there are x-macros tables for both high-level and |
| 92 // low-level instructions/operands that use the same enum key value. | 106 // low-level instructions/operands that use the same enum key value. |
| 93 // The tables are kept separate to maintain a proper separation | 107 // The tables are kept separate to maintain a proper separation |
| 94 // between abstraction layers. There is a risk that the tables | 108 // between abstraction layers. There is a risk that the tables |
| 95 // could get out of sync if enum values are reordered or if entries | 109 // could get out of sync if enum values are reordered or if entries |
| 96 // are added or deleted. This dummy function uses static_assert to | 110 // are added or deleted. This dummy function uses static_assert to |
| 97 // ensure everything is kept in sync. | 111 // ensure everything is kept in sync. |
| 98 void xMacroIntegrityCheck() { | 112 void xMacroIntegrityCheck() { |
| 99 // Validate the enum values in FCMPX8632_TABLE. | 113 // Validate the enum values in FCMPX8632_TABLE. |
| 100 { | 114 { |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 150 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); | 164 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); |
| 151 ICEINSTICMP_TABLE; | 165 ICEINSTICMP_TABLE; |
| 152 #undef X | 166 #undef X |
| 153 } | 167 } |
| 154 | 168 |
| 155 // Validate the enum values in ICETYPEX8632_TABLE. | 169 // Validate the enum values in ICETYPEX8632_TABLE. |
| 156 { | 170 { |
| 157 // Define a temporary set of enum values based on low-level | 171 // Define a temporary set of enum values based on low-level |
| 158 // table entries. | 172 // table entries. |
| 159 enum _tmp_enum { | 173 enum _tmp_enum { |
| 160 #define X(tag, cvt, sdss, width) _tmp_##tag, | 174 #define X(tag, cvt, sdss, pack, width) _tmp_##tag, |
| 161 ICETYPEX8632_TABLE | 175 ICETYPEX8632_TABLE |
| 162 #undef X | 176 #undef X |
| 163 _num | 177 _num |
| 164 }; | 178 }; |
| 165 // Define a set of constants based on high-level table entries. | 179 // Define a set of constants based on high-level table entries. |
| 166 #define X(tag, size, align, elts, elty, str) \ | 180 #define X(tag, size, align, elts, elty, str) \ |
| 167 static const int _table1_##tag = tag; | 181 static const int _table1_##tag = tag; |
| 168 ICETYPE_TABLE; | 182 ICETYPE_TABLE; |
| 169 #undef X | 183 #undef X |
| 170 // Define a set of constants based on low-level table entries, | 184 // Define a set of constants based on low-level table entries, |
| 171 // and ensure the table entry keys are consistent. | 185 // and ensure the table entry keys are consistent. |
| 172 #define X(tag, cvt, sdss, width) \ | 186 #define X(tag, cvt, sdss, pack, width) \ |
| 173 static const int _table2_##tag = _tmp_##tag; \ | 187 static const int _table2_##tag = _tmp_##tag; \ |
| 174 STATIC_ASSERT(_table1_##tag == _table2_##tag); | 188 STATIC_ASSERT(_table1_##tag == _table2_##tag); |
| 175 ICETYPEX8632_TABLE; | 189 ICETYPEX8632_TABLE; |
| 176 #undef X | 190 #undef X |
| 177 // Repeat the static asserts with respect to the high-level | 191 // Repeat the static asserts with respect to the high-level |
| 178 // table entries in case the high-level table has extra entries. | 192 // table entries in case the high-level table has extra entries. |
| 179 #define X(tag, size, align, elts, elty, str) \ | 193 #define X(tag, size, align, elts, elty, str) \ |
| 180 STATIC_ASSERT(_table1_##tag == _table2_##tag); | 194 STATIC_ASSERT(_table1_##tag == _table2_##tag); |
| 181 ICETYPE_TABLE; | 195 ICETYPE_TABLE; |
| 182 #undef X | 196 #undef X |
| (...skipping 947 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1130 } break; | 1144 } break; |
| 1131 case InstArithmetic::Fadd: | 1145 case InstArithmetic::Fadd: |
| 1132 case InstArithmetic::Fsub: | 1146 case InstArithmetic::Fsub: |
| 1133 case InstArithmetic::Fmul: | 1147 case InstArithmetic::Fmul: |
| 1134 case InstArithmetic::Fdiv: | 1148 case InstArithmetic::Fdiv: |
| 1135 case InstArithmetic::Frem: | 1149 case InstArithmetic::Frem: |
| 1136 llvm_unreachable("FP instruction with i64 type"); | 1150 llvm_unreachable("FP instruction with i64 type"); |
| 1137 break; | 1151 break; |
| 1138 } | 1152 } |
| 1139 } else if (isVectorType(Dest->getType())) { | 1153 } else if (isVectorType(Dest->getType())) { |
| 1154 // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in | |
| 1155 // registers. This is a workaround of the fact that there is no | |
| 1156 // support for aligning stack operands. Once alignment support is | |
| 1157 // implemented, replace legalizeToVar(Src1) with Src1. | |
|
Jim Stichnoth
2014/07/16 19:17:10
I'm not adamant about this, but I think it might b
wala
2014/07/17 01:34:54
Good idea. Done.
| |
| 1158 // | |
| 1159 // TODO: Trap on divide and modulo by zero. | |
|
Jim Stichnoth
2014/07/16 19:17:10
I believe this should only trap for integer div/mo
wala
2014/07/17 01:34:53
Done.
| |
| 1160 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899 | |
| 1140 switch (Inst->getOp()) { | 1161 switch (Inst->getOp()) { |
| 1141 case InstArithmetic::_num: | 1162 case InstArithmetic::_num: |
| 1142 llvm_unreachable("Unknown arithmetic operator"); | 1163 llvm_unreachable("Unknown arithmetic operator"); |
| 1143 break; | 1164 break; |
| 1144 case InstArithmetic::Add: | 1165 case InstArithmetic::Add: { |
| 1145 case InstArithmetic::And: | 1166 Variable *T = makeReg(Dest->getType()); |
| 1146 case InstArithmetic::Or: | 1167 _movp(T, Src0); |
| 1147 case InstArithmetic::Xor: | 1168 _padd(T, legalizeToVar(Src1)); |
| 1148 case InstArithmetic::Sub: | 1169 _movp(Dest, T); |
| 1149 case InstArithmetic::Mul: | 1170 } break; |
| 1150 case InstArithmetic::Shl: | 1171 case InstArithmetic::And: { |
| 1151 case InstArithmetic::Lshr: | 1172 Variable *T = makeReg(Dest->getType()); |
| 1152 case InstArithmetic::Ashr: | 1173 _movp(T, Src0); |
| 1153 case InstArithmetic::Udiv: | 1174 _pand(T, legalizeToVar(Src1)); |
| 1154 case InstArithmetic::Sdiv: | 1175 _movp(Dest, T); |
| 1155 case InstArithmetic::Urem: | 1176 } break; |
| 1156 case InstArithmetic::Srem: | 1177 case InstArithmetic::Or: { |
| 1157 // TODO(wala): Handle these. | 1178 Variable *T = makeReg(Dest->getType()); |
| 1158 Func->setError("Unhandled instruction"); | 1179 _movp(T, Src0); |
| 1159 break; | 1180 _por(T, legalizeToVar(Src1)); |
| 1181 _movp(Dest, T); | |
| 1182 } break; | |
| 1183 case InstArithmetic::Xor: { | |
| 1184 Variable *T = makeReg(Dest->getType()); | |
| 1185 _movp(T, Src0); | |
| 1186 _pxor(T, legalizeToVar(Src1)); | |
| 1187 _movp(Dest, T); | |
| 1188 } break; | |
| 1189 case InstArithmetic::Sub: { | |
| 1190 Variable *T = makeReg(Dest->getType()); | |
| 1191 _movp(T, Src0); | |
| 1192 _psub(T, legalizeToVar(Src1)); | |
| 1193 _movp(Dest, T); | |
| 1194 } break; | |
| 1195 case InstArithmetic::Mul: { | |
| 1196 if (Dest->getType() == IceType_v4i32) { | |
| 1197 // Lowering sequence: | |
| 1198 // movups T1, Src0 | |
| 1199 // pshufd T2, Src0, [1,0,3,0] | |
|
jvoung (off chromium)
2014/07/16 19:23:26
nit: The ordering of the vector ([1, 0, 3, 0]) is
wala
2014/07/17 01:34:53
I write vectors as if they were arrays in the comm
jvoung (off chromium)
2014/07/17 15:00:38
Okay, that helps some.
| |
| 1200 // pshufd T3, Src1, [1,0,3,0] | |
| 1201 // # T1 = { Src0[0] * Src1[0], Src0[2] * Src1[2] } | |
| 1202 // pmuludq T1, Src1 | |
| 1203 // # T2 = { Src0[1] * Src1[1], Src0[3] * Src1[3] } | |
| 1204 // pmuludq T2, T3 | |
| 1205 // # T1 = { lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2]) } | |
| 1206 // shufps T1, T2, [0,2,0,2] | |
| 1207 // pshufd T4, T1, [0,2,1,3] | |
| 1208 // movups Dest, T4 | |
| 1209 // | |
| 1210 // TODO(wala): SSE4.1 has pmulld. | |
| 1211 | |
| 1212 // Mask that directs pshufd to create a vector with entries | |
| 1213 // Src[1, 0, 3, 0] | |
| 1214 const unsigned Constant1030 = 0x31; | |
| 1215 Constant *Mask1030 = Ctx->getConstantInt(IceType_i8, Constant1030); | |
| 1216 // Mask that directs shufps to create a vector with entries | |
| 1217 // Dest[0, 2], Src[0, 2] | |
| 1218 const unsigned Mask0202 = 0x88; | |
| 1219 // Mask that directs pshufd to create a vector with entries | |
| 1220 // Src[0, 2, 1, 3] | |
| 1221 const unsigned Mask0213 = 0xd8; | |
| 1222 Variable *T1 = makeReg(IceType_v4i32); | |
| 1223 Variable *T2 = makeReg(IceType_v4i32); | |
| 1224 Variable *T3 = makeReg(IceType_v4i32); | |
| 1225 Variable *T4 = makeReg(IceType_v4i32); | |
| 1226 _movp(T1, Src0); | |
| 1227 // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R | |
| 1228 // with Src1 after stack operand alignment support is | |
| 1229 // implemented. | |
| 1230 Variable *Src0R = legalizeToVar(Src0); | |
| 1231 Variable *Src1R = legalizeToVar(Src1); | |
| 1232 _pshufd(T2, Src0R, Mask1030); | |
| 1233 _pshufd(T3, Src1R, Mask1030); | |
| 1234 _pmuludq(T1, Src1R); | |
| 1235 _pmuludq(T2, T3); | |
| 1236 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); | |
| 1237 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); | |
| 1238 _movp(Dest, T4); | |
| 1239 } else if (Dest->getType() == IceType_v8i16) { | |
| 1240 Variable *T = makeReg(IceType_v8i16); | |
| 1241 _movp(T, Src0); | |
| 1242 _pmullw(T, legalizeToVar(Src1)); | |
| 1243 _movp(Dest, T); | |
| 1244 } else { | |
| 1245 assert(Dest->getType() == IceType_v16i8); | |
| 1246 // Sz_mul_v16i8 | |
| 1247 const IceString Helper = "Sz_mul_v16i8"; | |
| 1248 const SizeT MaxSrcs = 2; | |
| 1249 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
| 1250 Call->addArg(Src0); | |
| 1251 Call->addArg(Src1); | |
| 1252 lowerCall(Call); | |
| 1253 } | |
| 1254 } break; | |
| 1255 case InstArithmetic::Shl: { | |
| 1256 // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8 | |
| 1257 const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType()); | |
| 1258 const SizeT MaxSrcs = 2; | |
| 1259 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
| 1260 Call->addArg(Src0); | |
| 1261 Call->addArg(Src1); | |
| 1262 lowerCall(Call); | |
| 1263 } break; | |
| 1264 case InstArithmetic::Lshr: { | |
| 1265 // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8 | |
| 1266 const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType()); | |
| 1267 const SizeT MaxSrcs = 2; | |
| 1268 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
| 1269 Call->addArg(Src0); | |
| 1270 Call->addArg(Src1); | |
| 1271 lowerCall(Call); | |
| 1272 } break; | |
| 1273 case InstArithmetic::Ashr: { | |
| 1274 // Sz_ashr_v4i32, Sz_ashr_v8i16, Sz_ashr_v16i8 | |
| 1275 const IceString Helper = "Sz_ashr_" + typeIdentString(Dest->getType()); | |
| 1276 const SizeT MaxSrcs = 2; | |
| 1277 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
| 1278 Call->addArg(Src0); | |
| 1279 Call->addArg(Src1); | |
| 1280 lowerCall(Call); | |
| 1281 } break; | |
| 1282 case InstArithmetic::Udiv: { | |
| 1283 // Sz_udiv_v4i32, Sz_udiv_v8i16, Sz_udiv_v16i8 | |
| 1284 const IceString Helper = "Sz_udiv_" + typeIdentString(Dest->getType()); | |
| 1285 const SizeT MaxSrcs = 2; | |
| 1286 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
| 1287 Call->addArg(Src0); | |
| 1288 Call->addArg(Src1); | |
| 1289 lowerCall(Call); | |
| 1290 } break; | |
| 1291 case InstArithmetic::Sdiv: { | |
| 1292 // Sz_sdiv_v4i32, Sz_sdiv_v8i16, Sz_sdiv_v16i8 | |
| 1293 const IceString Helper = "Sz_sdiv_" + typeIdentString(Dest->getType()); | |
| 1294 const SizeT MaxSrcs = 2; | |
| 1295 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
| 1296 Call->addArg(Src0); | |
| 1297 Call->addArg(Src1); | |
| 1298 lowerCall(Call); | |
| 1299 } break; | |
| 1300 case InstArithmetic::Urem: { | |
| 1301 // Sz_urem_v4i32, Sz_urem_v8i16, Sz_urem_v16i8 | |
| 1302 const IceString Helper = "Sz_urem_" + typeIdentString(Dest->getType()); | |
| 1303 const SizeT MaxSrcs = 2; | |
| 1304 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
| 1305 Call->addArg(Src0); | |
| 1306 Call->addArg(Src1); | |
| 1307 lowerCall(Call); | |
| 1308 } break; | |
| 1309 case InstArithmetic::Srem: { | |
| 1310 // Sz_srem_v4i32, Sz_srem_v8i16, Sz_srem_v16i8 | |
| 1311 const IceString Helper = "Sz_srem_" + typeIdentString(Dest->getType()); | |
| 1312 const SizeT MaxSrcs = 2; | |
| 1313 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
| 1314 Call->addArg(Src0); | |
| 1315 Call->addArg(Src1); | |
| 1316 lowerCall(Call); | |
| 1317 } break; | |
| 1160 case InstArithmetic::Fadd: { | 1318 case InstArithmetic::Fadd: { |
| 1161 Variable *T = makeReg(Dest->getType()); | 1319 Variable *T = makeReg(Dest->getType()); |
| 1162 _movp(T, Src0); | 1320 _movp(T, Src0); |
| 1163 _addps(T, Src1); | 1321 _addps(T, legalizeToVar(Src1)); |
| 1164 _movp(Dest, T); | 1322 _movp(Dest, T); |
| 1165 } break; | 1323 } break; |
| 1166 case InstArithmetic::Fsub: { | 1324 case InstArithmetic::Fsub: { |
| 1167 Variable *T = makeReg(Dest->getType()); | 1325 Variable *T = makeReg(Dest->getType()); |
| 1168 _movp(T, Src0); | 1326 _movp(T, Src0); |
| 1169 _subps(T, Src1); | 1327 _subps(T, legalizeToVar(Src1)); |
| 1170 _movp(Dest, T); | 1328 _movp(Dest, T); |
| 1171 } break; | 1329 } break; |
| 1172 case InstArithmetic::Fmul: { | 1330 case InstArithmetic::Fmul: { |
| 1173 Variable *T = makeReg(Dest->getType()); | 1331 Variable *T = makeReg(Dest->getType()); |
| 1174 _movp(T, Src0); | 1332 _movp(T, Src0); |
| 1175 _mulps(T, Src1); | 1333 _mulps(T, legalizeToVar(Src1)); |
| 1176 _movp(Dest, T); | 1334 _movp(Dest, T); |
| 1177 } break; | 1335 } break; |
| 1178 case InstArithmetic::Fdiv: { | 1336 case InstArithmetic::Fdiv: { |
| 1179 Variable *T = makeReg(Dest->getType()); | 1337 Variable *T = makeReg(Dest->getType()); |
| 1180 _movp(T, Src0); | 1338 _movp(T, Src0); |
| 1181 _divps(T, Src1); | 1339 _divps(T, legalizeToVar(Src1)); |
| 1182 _movp(Dest, T); | 1340 _movp(Dest, T); |
| 1183 } break; | 1341 } break; |
| 1184 case InstArithmetic::Frem: { | 1342 case InstArithmetic::Frem: { |
| 1185 const SizeT MaxSrcs = 2; | 1343 const SizeT MaxSrcs = 2; |
| 1186 InstCall *Call = makeHelperCall("__frem_v4f32", Dest, MaxSrcs); | 1344 InstCall *Call = makeHelperCall("Sz_frem_v4f32", Dest, MaxSrcs); |
| 1187 Call->addArg(Src0); | 1345 Call->addArg(Src0); |
| 1188 Call->addArg(Src1); | 1346 Call->addArg(Src1); |
| 1189 lowerCall(Call); | 1347 lowerCall(Call); |
| 1190 } break; | 1348 } break; |
| 1191 } | 1349 } |
| 1192 } else { // Dest->getType() is non-i64 scalar | 1350 } else { // Dest->getType() is non-i64 scalar |
| 1193 Variable *T_edx = NULL; | 1351 Variable *T_edx = NULL; |
| 1194 Variable *T = NULL; | 1352 Variable *T = NULL; |
| 1195 switch (Inst->getOp()) { | 1353 switch (Inst->getOp()) { |
| 1196 case InstArithmetic::_num: | 1354 case InstArithmetic::_num: |
| (...skipping 1873 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3070 for (SizeT i = 0; i < Size; ++i) { | 3228 for (SizeT i = 0; i < Size; ++i) { |
| 3071 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 3229 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
| 3072 } | 3230 } |
| 3073 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 3231 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
| 3074 } | 3232 } |
| 3075 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 3233 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
| 3076 << "\n"; | 3234 << "\n"; |
| 3077 } | 3235 } |
| 3078 | 3236 |
| 3079 } // end of namespace Ice | 3237 } // end of namespace Ice |
| OLD | NEW |