OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
83 size_t Index = static_cast<size_t>(Cond); | 83 size_t Index = static_cast<size_t>(Cond); |
84 assert(Index < TableIcmp32Size); | 84 assert(Index < TableIcmp32Size); |
85 return TableIcmp32[Index].Mapping; | 85 return TableIcmp32[Index].Mapping; |
86 } | 86 } |
87 | 87 |
88 // The maximum number of arguments to pass in XMM registers | 88 // The maximum number of arguments to pass in XMM registers |
89 const unsigned X86_MAX_XMM_ARGS = 4; | 89 const unsigned X86_MAX_XMM_ARGS = 4; |
90 // The number of bits in a byte | 90 // The number of bits in a byte |
91 const unsigned X86_CHAR_BIT = 8; | 91 const unsigned X86_CHAR_BIT = 8; |
92 | 92 |
| 93 // Return a string representation of the type that is suitable for use |
| 94 // in an identifier. |
| 95 IceString typeIdentString(const Type Ty) { |
| 96 IceString Str; |
| 97 llvm::raw_string_ostream BaseOS(Str); |
| 98 Ostream OS(&BaseOS); |
| 99 if (isVectorType(Ty)) { |
| 100 OS << "v" << typeNumElements(Ty) << typeElementType(Ty); |
| 101 } else { |
| 102 OS << Ty; |
| 103 } |
| 104 return BaseOS.str(); |
| 105 } |
| 106 |
93 // In some cases, there are x-macros tables for both high-level and | 107 // In some cases, there are x-macros tables for both high-level and |
94 // low-level instructions/operands that use the same enum key value. | 108 // low-level instructions/operands that use the same enum key value. |
95 // The tables are kept separate to maintain a proper separation | 109 // The tables are kept separate to maintain a proper separation |
96 // between abstraction layers. There is a risk that the tables | 110 // between abstraction layers. There is a risk that the tables |
97 // could get out of sync if enum values are reordered or if entries | 111 // could get out of sync if enum values are reordered or if entries |
98 // are added or deleted. This dummy function uses static_assert to | 112 // are added or deleted. This dummy function uses static_assert to |
99 // ensure everything is kept in sync. | 113 // ensure everything is kept in sync. |
100 void xMacroIntegrityCheck() { | 114 void xMacroIntegrityCheck() { |
101 // Validate the enum values in FCMPX8632_TABLE. | 115 // Validate the enum values in FCMPX8632_TABLE. |
102 { | 116 { |
(...skipping 1029 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1132 } break; | 1146 } break; |
1133 case InstArithmetic::Fadd: | 1147 case InstArithmetic::Fadd: |
1134 case InstArithmetic::Fsub: | 1148 case InstArithmetic::Fsub: |
1135 case InstArithmetic::Fmul: | 1149 case InstArithmetic::Fmul: |
1136 case InstArithmetic::Fdiv: | 1150 case InstArithmetic::Fdiv: |
1137 case InstArithmetic::Frem: | 1151 case InstArithmetic::Frem: |
1138 llvm_unreachable("FP instruction with i64 type"); | 1152 llvm_unreachable("FP instruction with i64 type"); |
1139 break; | 1153 break; |
1140 } | 1154 } |
1141 } else if (isVectorType(Dest->getType())) { | 1155 } else if (isVectorType(Dest->getType())) { |
| 1156 // TODO: Trap on integer divide and integer modulo by zero. |
| 1157 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899 |
| 1158 // |
| 1159 // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in |
| 1160 // registers. This is a workaround of the fact that there is no |
| 1161 // support for aligning stack operands. Once there is support, |
| 1162 // remove LEGAL_HACK. |
| 1163 #define LEGAL_HACK(s) legalizeToVar((s)) |
1142 switch (Inst->getOp()) { | 1164 switch (Inst->getOp()) { |
1143 case InstArithmetic::_num: | 1165 case InstArithmetic::_num: |
1144 llvm_unreachable("Unknown arithmetic operator"); | 1166 llvm_unreachable("Unknown arithmetic operator"); |
1145 break; | 1167 break; |
1146 case InstArithmetic::Add: | 1168 case InstArithmetic::Add: { |
1147 case InstArithmetic::And: | 1169 Variable *T = makeReg(Dest->getType()); |
1148 case InstArithmetic::Or: | 1170 _movp(T, Src0); |
1149 case InstArithmetic::Xor: | 1171 _padd(T, LEGAL_HACK(Src1)); |
1150 case InstArithmetic::Sub: | 1172 _movp(Dest, T); |
1151 case InstArithmetic::Mul: | 1173 } break; |
1152 case InstArithmetic::Shl: | 1174 case InstArithmetic::And: { |
1153 case InstArithmetic::Lshr: | 1175 Variable *T = makeReg(Dest->getType()); |
1154 case InstArithmetic::Ashr: | 1176 _movp(T, Src0); |
1155 case InstArithmetic::Udiv: | 1177 _pand(T, LEGAL_HACK(Src1)); |
1156 case InstArithmetic::Sdiv: | 1178 _movp(Dest, T); |
1157 case InstArithmetic::Urem: | 1179 } break; |
1158 case InstArithmetic::Srem: | 1180 case InstArithmetic::Or: { |
1159 // TODO(wala): Handle these. | 1181 Variable *T = makeReg(Dest->getType()); |
1160 Func->setError("Unhandled instruction"); | 1182 _movp(T, Src0); |
1161 break; | 1183 _por(T, LEGAL_HACK(Src1)); |
| 1184 _movp(Dest, T); |
| 1185 } break; |
| 1186 case InstArithmetic::Xor: { |
| 1187 Variable *T = makeReg(Dest->getType()); |
| 1188 _movp(T, Src0); |
| 1189 _pxor(T, LEGAL_HACK(Src1)); |
| 1190 _movp(Dest, T); |
| 1191 } break; |
| 1192 case InstArithmetic::Sub: { |
| 1193 Variable *T = makeReg(Dest->getType()); |
| 1194 _movp(T, Src0); |
| 1195 _psub(T, LEGAL_HACK(Src1)); |
| 1196 _movp(Dest, T); |
| 1197 } break; |
| 1198 case InstArithmetic::Mul: { |
| 1199 if (Dest->getType() == IceType_v4i32) { |
| 1200 // Lowering sequence: |
| 1201 // Note: The mask arguments have index 0 on the left. |
| 1202 // |
| 1203 // movups T1, Src0 |
| 1204 // pshufd T2, Src0, {1,0,3,0} |
| 1205 // pshufd T3, Src1, {1,0,3,0} |
| 1206 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]} |
| 1207 // pmuludq T1, Src1 |
| 1208 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} |
| 1209 // pmuludq T2, T3 |
| 1210 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])} |
| 1211 // shufps T1, T2, {0,2,0,2} |
| 1212 // pshufd T4, T1, {0,2,1,3} |
| 1213 // movups Dest, T4 |
| 1214 // |
| 1215 // TODO(wala): SSE4.1 has pmulld. |
| 1216 |
| 1217 // Mask that directs pshufd to create a vector with entries |
| 1218 // Src[1, 0, 3, 0] |
| 1219 const unsigned Constant1030 = 0x31; |
| 1220 Constant *Mask1030 = Ctx->getConstantInt(IceType_i8, Constant1030); |
| 1221 // Mask that directs shufps to create a vector with entries |
| 1222 // Dest[0, 2], Src[0, 2] |
| 1223 const unsigned Mask0202 = 0x88; |
| 1224 // Mask that directs pshufd to create a vector with entries |
| 1225 // Src[0, 2, 1, 3] |
| 1226 const unsigned Mask0213 = 0xd8; |
| 1227 Variable *T1 = makeReg(IceType_v4i32); |
| 1228 Variable *T2 = makeReg(IceType_v4i32); |
| 1229 Variable *T3 = makeReg(IceType_v4i32); |
| 1230 Variable *T4 = makeReg(IceType_v4i32); |
| 1231 _movp(T1, Src0); |
| 1232 // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R |
| 1233 // with Src1 after stack operand alignment support is |
| 1234 // implemented. |
| 1235 Variable *Src0R = LEGAL_HACK(Src0); |
| 1236 Variable *Src1R = LEGAL_HACK(Src1); |
| 1237 _pshufd(T2, Src0R, Mask1030); |
| 1238 _pshufd(T3, Src1R, Mask1030); |
| 1239 _pmuludq(T1, Src1R); |
| 1240 _pmuludq(T2, T3); |
| 1241 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); |
| 1242 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); |
| 1243 _movp(Dest, T4); |
| 1244 } else if (Dest->getType() == IceType_v8i16) { |
| 1245 Variable *T = makeReg(IceType_v8i16); |
| 1246 _movp(T, Src0); |
| 1247 _pmullw(T, legalizeToVar(Src1)); |
| 1248 _movp(Dest, T); |
| 1249 } else { |
| 1250 assert(Dest->getType() == IceType_v16i8); |
| 1251 // Sz_mul_v16i8 |
| 1252 const IceString Helper = "Sz_mul_v16i8"; |
| 1253 const SizeT MaxSrcs = 2; |
| 1254 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); |
| 1255 Call->addArg(Src0); |
| 1256 Call->addArg(Src1); |
| 1257 lowerCall(Call); |
| 1258 } |
| 1259 } break; |
| 1260 case InstArithmetic::Shl: { |
| 1261 // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8 |
| 1262 const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType()); |
| 1263 const SizeT MaxSrcs = 2; |
| 1264 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); |
| 1265 Call->addArg(Src0); |
| 1266 Call->addArg(Src1); |
| 1267 lowerCall(Call); |
| 1268 } break; |
| 1269 case InstArithmetic::Lshr: { |
| 1270 // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8 |
| 1271 const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType()); |
| 1272 const SizeT MaxSrcs = 2; |
| 1273 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); |
| 1274 Call->addArg(Src0); |
| 1275 Call->addArg(Src1); |
| 1276 lowerCall(Call); |
| 1277 } break; |
| 1278 case InstArithmetic::Ashr: { |
| 1279 // Sz_ashr_v4i32, Sz_ashr_v8i16, Sz_ashr_v16i8 |
| 1280 const IceString Helper = "Sz_ashr_" + typeIdentString(Dest->getType()); |
| 1281 const SizeT MaxSrcs = 2; |
| 1282 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); |
| 1283 Call->addArg(Src0); |
| 1284 Call->addArg(Src1); |
| 1285 lowerCall(Call); |
| 1286 } break; |
| 1287 case InstArithmetic::Udiv: { |
| 1288 // Sz_udiv_v4i32, Sz_udiv_v8i16, Sz_udiv_v16i8 |
| 1289 const IceString Helper = "Sz_udiv_" + typeIdentString(Dest->getType()); |
| 1290 const SizeT MaxSrcs = 2; |
| 1291 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); |
| 1292 Call->addArg(Src0); |
| 1293 Call->addArg(Src1); |
| 1294 lowerCall(Call); |
| 1295 } break; |
| 1296 case InstArithmetic::Sdiv: { |
| 1297 // Sz_sdiv_v4i32, Sz_sdiv_v8i16, Sz_sdiv_v16i8 |
| 1298 const IceString Helper = "Sz_sdiv_" + typeIdentString(Dest->getType()); |
| 1299 const SizeT MaxSrcs = 2; |
| 1300 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); |
| 1301 Call->addArg(Src0); |
| 1302 Call->addArg(Src1); |
| 1303 lowerCall(Call); |
| 1304 } break; |
| 1305 case InstArithmetic::Urem: { |
| 1306 // Sz_urem_v4i32, Sz_urem_v8i16, Sz_urem_v16i8 |
| 1307 const IceString Helper = "Sz_urem_" + typeIdentString(Dest->getType()); |
| 1308 const SizeT MaxSrcs = 2; |
| 1309 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); |
| 1310 Call->addArg(Src0); |
| 1311 Call->addArg(Src1); |
| 1312 lowerCall(Call); |
| 1313 } break; |
| 1314 case InstArithmetic::Srem: { |
| 1315 // Sz_srem_v4i32, Sz_srem_v8i16, Sz_srem_v16i8 |
| 1316 const IceString Helper = "Sz_srem_" + typeIdentString(Dest->getType()); |
| 1317 const SizeT MaxSrcs = 2; |
| 1318 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); |
| 1319 Call->addArg(Src0); |
| 1320 Call->addArg(Src1); |
| 1321 lowerCall(Call); |
| 1322 } break; |
1162 case InstArithmetic::Fadd: { | 1323 case InstArithmetic::Fadd: { |
1163 Variable *T = makeReg(Dest->getType()); | 1324 Variable *T = makeReg(Dest->getType()); |
1164 _movp(T, Src0); | 1325 _movp(T, Src0); |
1165 _addps(T, Src1); | 1326 _addps(T, LEGAL_HACK(Src1)); |
1166 _movp(Dest, T); | 1327 _movp(Dest, T); |
1167 } break; | 1328 } break; |
1168 case InstArithmetic::Fsub: { | 1329 case InstArithmetic::Fsub: { |
1169 Variable *T = makeReg(Dest->getType()); | 1330 Variable *T = makeReg(Dest->getType()); |
1170 _movp(T, Src0); | 1331 _movp(T, Src0); |
1171 _subps(T, Src1); | 1332 _subps(T, LEGAL_HACK(Src1)); |
1172 _movp(Dest, T); | 1333 _movp(Dest, T); |
1173 } break; | 1334 } break; |
1174 case InstArithmetic::Fmul: { | 1335 case InstArithmetic::Fmul: { |
1175 Variable *T = makeReg(Dest->getType()); | 1336 Variable *T = makeReg(Dest->getType()); |
1176 _movp(T, Src0); | 1337 _movp(T, Src0); |
1177 _mulps(T, Src1); | 1338 _mulps(T, LEGAL_HACK(Src1)); |
1178 _movp(Dest, T); | 1339 _movp(Dest, T); |
1179 } break; | 1340 } break; |
1180 case InstArithmetic::Fdiv: { | 1341 case InstArithmetic::Fdiv: { |
1181 Variable *T = makeReg(Dest->getType()); | 1342 Variable *T = makeReg(Dest->getType()); |
1182 _movp(T, Src0); | 1343 _movp(T, Src0); |
1183 _divps(T, Src1); | 1344 _divps(T, LEGAL_HACK(Src1)); |
1184 _movp(Dest, T); | 1345 _movp(Dest, T); |
1185 } break; | 1346 } break; |
1186 case InstArithmetic::Frem: { | 1347 case InstArithmetic::Frem: { |
1187 const SizeT MaxSrcs = 2; | 1348 const SizeT MaxSrcs = 2; |
1188 InstCall *Call = makeHelperCall("__frem_v4f32", Dest, MaxSrcs); | 1349 InstCall *Call = makeHelperCall("Sz_frem_v4f32", Dest, MaxSrcs); |
1189 Call->addArg(Src0); | 1350 Call->addArg(Src0); |
1190 Call->addArg(Src1); | 1351 Call->addArg(Src1); |
1191 lowerCall(Call); | 1352 lowerCall(Call); |
1192 } break; | 1353 } break; |
1193 } | 1354 } |
| 1355 #undef LEGAL_HACK |
1194 } else { // Dest->getType() is non-i64 scalar | 1356 } else { // Dest->getType() is non-i64 scalar |
1195 Variable *T_edx = NULL; | 1357 Variable *T_edx = NULL; |
1196 Variable *T = NULL; | 1358 Variable *T = NULL; |
1197 switch (Inst->getOp()) { | 1359 switch (Inst->getOp()) { |
1198 case InstArithmetic::_num: | 1360 case InstArithmetic::_num: |
1199 llvm_unreachable("Unknown arithmetic operator"); | 1361 llvm_unreachable("Unknown arithmetic operator"); |
1200 break; | 1362 break; |
1201 case InstArithmetic::Add: | 1363 case InstArithmetic::Add: |
1202 _mov(T, Src0); | 1364 _mov(T, Src0); |
1203 _add(T, Src1); | 1365 _add(T, Src1); |
(...skipping 2117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3321 for (SizeT i = 0; i < Size; ++i) { | 3483 for (SizeT i = 0; i < Size; ++i) { |
3322 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 3484 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
3323 } | 3485 } |
3324 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 3486 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
3325 } | 3487 } |
3326 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 3488 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
3327 << "\n"; | 3489 << "\n"; |
3328 } | 3490 } |
3329 | 3491 |
3330 } // end of namespace Ice | 3492 } // end of namespace Ice |
OLD | NEW |