Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 1166 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1177 _mul(T_4Lo, T_3, Src1Lo); | 1177 _mul(T_4Lo, T_3, Src1Lo); |
| 1178 // The mul instruction produces two dest variables, edx:eax. We create a | 1178 // The mul instruction produces two dest variables, edx:eax. We create a |
| 1179 // fake definition of edx to account for this. | 1179 // fake definition of edx to account for this. |
| 1180 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo)); | 1180 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo)); |
| 1181 _mov(DestLo, T_4Lo); | 1181 _mov(DestLo, T_4Lo); |
| 1182 _add(T_4Hi, T_1); | 1182 _add(T_4Hi, T_1); |
| 1183 _add(T_4Hi, T_2); | 1183 _add(T_4Hi, T_2); |
| 1184 _mov(DestHi, T_4Hi); | 1184 _mov(DestHi, T_4Hi); |
| 1185 } break; | 1185 } break; |
| 1186 case InstArithmetic::Shl: { | 1186 case InstArithmetic::Shl: { |
| 1187 // TODO: Refactor the similarities between Shl, Lshr, and Ashr. | |
|
Jim Stichnoth
2015/09/22 05:48:46
Don't delete this TODO unless you're taking care o
sehr
2015/09/22 16:03:17
Done.
| |
| 1188 // gcc does the following: | |
| 1189 // a=b<<c ==> | |
| 1190 // t1:ecx = c.lo & 0xff | |
| 1191 // t2 = b.lo | |
| 1192 // t3 = b.hi | |
| 1193 // t3 = shld t3, t2, t1 | |
| 1194 // t2 = shl t2, t1 | |
| 1195 // test t1, 0x20 | |
| 1196 // je L1 | |
| 1197 // use(t3) | |
| 1198 // t3 = t2 | |
| 1199 // t2 = 0 | |
| 1200 // L1: | |
| 1201 // a.lo = t2 | |
| 1202 // a.hi = t3 | |
| 1203 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; | 1187 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; |
| 1204 Constant *BitTest = Ctx->getConstantInt32(0x20); | |
| 1205 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1188 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1206 typename Traits::Insts::Label *Label = | 1189 if (const auto *ConstantShiftAmount = |
| 1207 Traits::Insts::Label::create(Func, this); | 1190 llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { |
| 1208 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); | 1191 uint32_t ShiftAmount = ConstantShiftAmount->getValue(); |
| 1209 _mov(T_2, Src0Lo); | 1192 if (ShiftAmount > 32) { |
| 1210 _mov(T_3, Src0Hi); | 1193 // a=b<<c ==> |
| 1211 _shld(T_3, T_2, T_1); | 1194 // t2 = b.lo |
| 1212 _shl(T_2, T_1); | 1195 // t2 = shl t2, ShiftAmount-32 |
| 1213 _test(T_1, BitTest); | 1196 // t3 = t2 |
| 1214 _br(Traits::Cond::Br_e, Label); | 1197 // t2 = 0 |
| 1215 // T_2 and T_3 are being assigned again because of the intra-block | 1198 _mov(T_2, Src0Lo); |
| 1216 // control flow, so we need the _mov_nonkillable variant to avoid | 1199 _shl(T_2, Ctx->getConstantInt32(ShiftAmount-32)); |
|
Jim Stichnoth
2015/09/22 05:48:46
Please run "make -f Makefile.standalone format" to
sehr
2015/09/22 16:03:17
Done.
| |
| 1217 // liveness problems. | 1200 _mov(DestHi, T_2); |
| 1218 _mov_nonkillable(T_3, T_2); | 1201 _mov(DestLo, Zero); |
| 1219 _mov_nonkillable(T_2, Zero); | 1202 } else if (ShiftAmount == 32) { |
| 1220 Context.insert(Label); | 1203 // a=b<<c ==> |
| 1221 _mov(DestLo, T_2); | 1204 // t2 = b.lo |
| 1222 _mov(DestHi, T_3); | 1205 // a.hi = t2 |
| 1206 // a.lo = 0 | |
| 1207 _mov(T_2, Src0Lo); | |
| 1208 _mov(DestHi, T_2); | |
| 1209 _mov(DestLo, Zero); | |
| 1210 } else { | |
| 1211 // a=b<<c ==> | |
| 1212 // t2 = b.lo | |
| 1213 // t3 = b.hi | |
| 1214 // t3 = shld t3, t2, ShiftAmount | |
| 1215 // t2 = shl t2, ShiftAmount | |
| 1216 // a.lo = t2 | |
| 1217 // a.hi = t3 | |
| 1218 _mov(T_2, Src0Lo); | |
| 1219 _mov(T_3, Src0Hi); | |
| 1220 _shld(T_3, T_2, Ctx->getConstantInt32(ShiftAmount)); | |
| 1221 _shl(T_2, Ctx->getConstantInt32(ShiftAmount)); | |
| 1222 // Move T_2 first to reduce register pressure. | |
| 1223 _mov(DestLo, T_2); | |
| 1224 _mov(DestHi, T_3); | |
| 1225 } | |
| 1226 } else { | |
| 1227 // a=b<<c ==> | |
| 1228 // t1:ecx = c.lo & 0xff | |
| 1229 // t2 = b.lo | |
| 1230 // t3 = b.hi | |
| 1231 // t3 = shld t3, t2, t1 | |
| 1232 // t2 = shl t2, t1 | |
| 1233 // test t1, 0x20 | |
| 1234 // je L1 | |
| 1235 // use(t3) | |
| 1236 // t3 = t2 | |
| 1237 // t2 = 0 | |
| 1238 // L1: | |
| 1239 // a.lo = t2 | |
| 1240 // a.hi = t3 | |
| 1241 Constant *BitTest = Ctx->getConstantInt32(0x20); | |
| 1242 typename Traits::Insts::Label *Label = | |
| 1243 Traits::Insts::Label::create(Func, this); | |
| 1244 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); | |
| 1245 _mov(T_2, Src0Lo); | |
| 1246 _mov(T_3, Src0Hi); | |
| 1247 _shld(T_3, T_2, T_1); | |
| 1248 _shl(T_2, T_1); | |
| 1249 _test(T_1, BitTest); | |
| 1250 _br(Traits::Cond::Br_e, Label); | |
| 1251 // T_2 and T_3 are being assigned again because of the intra-block | |
| 1252 // control flow, so we need the _mov_nonkillable variant to avoid | |
| 1253 // liveness problems. | |
| 1254 _mov_nonkillable(T_3, T_2); | |
| 1255 _mov_nonkillable(T_2, Zero); | |
| 1256 Context.insert(Label); | |
| 1257 _mov(DestLo, T_2); | |
| 1258 _mov(DestHi, T_3); | |
| 1259 } | |
| 1223 } break; | 1260 } break; |
| 1224 case InstArithmetic::Lshr: { | 1261 case InstArithmetic::Lshr: { |
| 1225 // a=b>>c (unsigned) ==> | |
| 1226 // t1:ecx = c.lo & 0xff | |
| 1227 // t2 = b.lo | |
| 1228 // t3 = b.hi | |
| 1229 // t2 = shrd t2, t3, t1 | |
| 1230 // t3 = shr t3, t1 | |
| 1231 // test t1, 0x20 | |
| 1232 // je L1 | |
| 1233 // use(t2) | |
| 1234 // t2 = t3 | |
| 1235 // t3 = 0 | |
| 1236 // L1: | |
| 1237 // a.lo = t2 | |
| 1238 // a.hi = t3 | |
| 1239 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; | 1262 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; |
| 1240 Constant *BitTest = Ctx->getConstantInt32(0x20); | |
| 1241 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1263 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1242 typename Traits::Insts::Label *Label = | 1264 if (const auto *ConstantShiftAmount = |
| 1243 Traits::Insts::Label::create(Func, this); | 1265 llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { |
| 1244 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); | 1266 uint32_t ShiftAmount = ConstantShiftAmount->getValue(); |
| 1245 _mov(T_2, Src0Lo); | 1267 if (ShiftAmount > 32) { |
| 1246 _mov(T_3, Src0Hi); | 1268 // a=b>>c (unsigned) ==> |
| 1247 _shrd(T_2, T_3, T_1); | 1269 // t3 = b.hi |
| 1248 _shr(T_3, T_1); | 1270 // t3 = shr t3, ShiftAmount-32 |
| 1249 _test(T_1, BitTest); | 1271 // a.lo = t3 |
| 1250 _br(Traits::Cond::Br_e, Label); | 1272 // a.hi = 0 |
| 1251 // T_2 and T_3 are being assigned again because of the intra-block | 1273 _mov(T_3, Src0Hi); |
| 1252 // control flow, so we need the _mov_nonkillable variant to avoid | 1274 _shr(T_3, Ctx->getConstantInt32(ShiftAmount-32)); |
| 1253 // liveness problems. | 1275 _mov(DestLo, T_3); |
| 1254 _mov_nonkillable(T_2, T_3); | 1276 _mov(DestHi, Zero); |
| 1255 _mov_nonkillable(T_3, Zero); | 1277 } else if (ShiftAmount == 32) { |
| 1256 Context.insert(Label); | 1278 // a=b>>c (unsigned) ==> |
| 1257 _mov(DestLo, T_2); | 1279 // t3 = b.hi |
| 1258 _mov(DestHi, T_3); | 1280 // a.lo = t3 |
| 1281 // a.hi = 0 | |
| 1282 _mov(T_3, Src0Hi); | |
| 1283 _mov(DestLo, T_3); | |
| 1284 _mov(DestHi, Zero); | |
| 1285 } else { | |
| 1286 // a=b>>c (unsigned) ==> | |
| 1287 // t2 = b.lo | |
| 1288 // t3 = b.hi | |
| 1289 // t2 = shrd t2, t3, ShiftAmount | |
| 1290 // t3 = shr t3, ShiftAmount | |
| 1291 // a.lo = t2 | |
| 1292 // a.hi = t3 | |
| 1293 _mov(T_2, Src0Lo); | |
| 1294 _mov(T_3, Src0Hi); | |
| 1295 _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount)); | |
| 1296 _shr(T_3, Ctx->getConstantInt32(ShiftAmount)); | |
| 1297 // Move T_3 first to reduce register pressure. | |
| 1298 _mov(DestHi, T_3); | |
| 1299 _mov(DestLo, T_2); | |
| 1300 } | |
| 1301 } else { | |
| 1302 // a=b>>c (unsigned) ==> | |
| 1303 // t1:ecx = c.lo & 0xff | |
| 1304 // t2 = b.lo | |
| 1305 // t3 = b.hi | |
| 1306 // t2 = shrd t2, t3, t1 | |
| 1307 // t3 = shr t3, t1 | |
| 1308 // test t1, 0x20 | |
| 1309 // je L1 | |
| 1310 // use(t2) | |
| 1311 // t2 = t3 | |
| 1312 // t3 = 0 | |
| 1313 // L1: | |
| 1314 // a.lo = t2 | |
| 1315 // a.hi = t3 | |
| 1316 Constant *BitTest = Ctx->getConstantInt32(0x20); | |
| 1317 typename Traits::Insts::Label *Label = | |
| 1318 Traits::Insts::Label::create(Func, this); | |
| 1319 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); | |
| 1320 _mov(T_2, Src0Lo); | |
| 1321 _mov(T_3, Src0Hi); | |
| 1322 _shrd(T_2, T_3, T_1); | |
| 1323 _shr(T_3, T_1); | |
| 1324 _test(T_1, BitTest); | |
| 1325 _br(Traits::Cond::Br_e, Label); | |
| 1326 // T_2 and T_3 are being assigned again because of the intra-block | |
| 1327 // control flow, so we need the _mov_nonkillable variant to avoid | |
| 1328 // liveness problems. | |
| 1329 _mov_nonkillable(T_2, T_3); | |
| 1330 _mov_nonkillable(T_3, Zero); | |
| 1331 Context.insert(Label); | |
| 1332 _mov(DestLo, T_2); | |
| 1333 _mov(DestHi, T_3); | |
| 1334 } | |
| 1259 } break; | 1335 } break; |
| 1260 case InstArithmetic::Ashr: { | 1336 case InstArithmetic::Ashr: { |
| 1261 // a=b>>c (signed) ==> | |
| 1262 // t1:ecx = c.lo & 0xff | |
| 1263 // t2 = b.lo | |
| 1264 // t3 = b.hi | |
| 1265 // t2 = shrd t2, t3, t1 | |
| 1266 // t3 = sar t3, t1 | |
| 1267 // test t1, 0x20 | |
| 1268 // je L1 | |
| 1269 // use(t2) | |
| 1270 // t2 = t3 | |
| 1271 // t3 = sar t3, 0x1f | |
| 1272 // L1: | |
| 1273 // a.lo = t2 | |
| 1274 // a.hi = t3 | |
| 1275 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; | 1337 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; |
| 1276 Constant *BitTest = Ctx->getConstantInt32(0x20); | 1338 if (const auto *ConstantShiftAmount = |
| 1277 Constant *SignExtend = Ctx->getConstantInt32(0x1f); | 1339 llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { |
| 1278 typename Traits::Insts::Label *Label = | 1340 uint32_t ShiftAmount = ConstantShiftAmount->getValue(); |
| 1279 Traits::Insts::Label::create(Func, this); | 1341 if (ShiftAmount > 32) { |
| 1280 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); | 1342 // a=b>>c (signed) ==> |
| 1281 _mov(T_2, Src0Lo); | 1343 // t2 = b.hi |
| 1282 _mov(T_3, Src0Hi); | 1344 // t3 = b.hi |
| 1283 _shrd(T_2, T_3, T_1); | 1345 // t3 = sar t3, 0x1f |
| 1284 _sar(T_3, T_1); | 1346 // t2 = shrd t2, t3, ShiftAmount-32 |
| 1285 _test(T_1, BitTest); | 1347 // a.lo = t2 |
| 1286 _br(Traits::Cond::Br_e, Label); | 1348 // a.hi = t3 |
| 1287 // T_2 and T_3 are being assigned again because of the intra-block | 1349 _mov(T_2, Src0Hi); |
| 1288 // control flow, so T_2 needs the _mov_nonkillable variant to avoid | 1350 _mov(T_3, Src0Hi); |
| 1289 // liveness problems. T_3 doesn't need special treatment because it is | 1351 _sar(T_3, Ctx->getConstantInt32(0x1f)); |
| 1290 // reassigned via _sar instead of _mov. | 1352 _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount-32)); |
| 1291 _mov_nonkillable(T_2, T_3); | 1353 _mov(DestLo, T_2); |
| 1292 _sar(T_3, SignExtend); | 1354 _mov(DestHi, T_3); |
| 1293 Context.insert(Label); | 1355 } else if (ShiftAmount == 32) { |
| 1294 _mov(DestLo, T_2); | 1356 // a=b>>c (signed) ==> |
| 1295 _mov(DestHi, T_3); | 1357 // t2 = b.hi |
| 1358 // a.lo = t2 | |
| 1359 // t3 = b.hi | |
| 1360 // t3 = sar t3, 0x1f | |
| 1361 // a.hi = t3 | |
| 1362 _mov(T_2, Src0Hi); | |
| 1363 _mov(DestLo, T_2); | |
| 1364 _mov(T_3, Src0Hi); | |
| 1365 _sar(T_3, Ctx->getConstantInt32(0x1f)); | |
| 1366 _mov(DestHi, T_3); | |
| 1367 } else { | |
| 1368 // a=b>>c (signed) ==> | |
| 1369 // t2 = b.lo | |
| 1370 // t3 = b.hi | |
| 1371 // t2 = shrd t2, t3, ShiftAmount | |
| 1372 // t3 = sar t3, ShiftAmount | |
| 1373 // a.lo = t2 | |
| 1374 // a.hi = t3 | |
| 1375 _mov(T_2, Src0Lo); | |
| 1376 _mov(T_3, Src0Hi); | |
| 1377 _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount)); | |
| 1378 _sar(T_3, Ctx->getConstantInt32(ShiftAmount)); | |
| 1379 _mov(DestLo, T_2); | |
| 1380 _mov(DestHi, T_3); | |
| 1381 } | |
| 1382 } else { | |
| 1383 // a=b>>c (signed) ==> | |
| 1384 // t1:ecx = c.lo & 0xff | |
| 1385 // t2 = b.lo | |
| 1386 // t3 = b.hi | |
| 1387 // t2 = shrd t2, t3, t1 | |
| 1388 // t3 = sar t3, t1 | |
| 1389 // test t1, 0x20 | |
| 1390 // je L1 | |
| 1391 // use(t2) | |
| 1392 // t2 = t3 | |
| 1393 // t3 = sar t3, 0x1f | |
| 1394 // L1: | |
| 1395 // a.lo = t2 | |
| 1396 // a.hi = t3 | |
| 1397 Constant *BitTest = Ctx->getConstantInt32(0x20); | |
| 1398 Constant *SignExtend = Ctx->getConstantInt32(0x1f); | |
| 1399 typename Traits::Insts::Label *Label = | |
| 1400 Traits::Insts::Label::create(Func, this); | |
| 1401 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); | |
| 1402 _mov(T_2, Src0Lo); | |
| 1403 _mov(T_3, Src0Hi); | |
| 1404 _shrd(T_2, T_3, T_1); | |
| 1405 _sar(T_3, T_1); | |
| 1406 _test(T_1, BitTest); | |
| 1407 _br(Traits::Cond::Br_e, Label); | |
| 1408 // T_2 and T_3 are being assigned again because of the intra-block | |
| 1409 // control flow, so T_2 needs the _mov_nonkillable variant to avoid | |
| 1410 // liveness problems. T_3 doesn't need special treatment because it is | |
| 1411 // reassigned via _sar instead of _mov. | |
| 1412 _mov_nonkillable(T_2, T_3); | |
| 1413 _sar(T_3, SignExtend); | |
| 1414 Context.insert(Label); | |
| 1415 _mov(DestLo, T_2); | |
| 1416 _mov(DestHi, T_3); | |
| 1417 } | |
| 1296 } break; | 1418 } break; |
| 1297 case InstArithmetic::Fadd: | 1419 case InstArithmetic::Fadd: |
| 1298 case InstArithmetic::Fsub: | 1420 case InstArithmetic::Fsub: |
| 1299 case InstArithmetic::Fmul: | 1421 case InstArithmetic::Fmul: |
| 1300 case InstArithmetic::Fdiv: | 1422 case InstArithmetic::Fdiv: |
| 1301 case InstArithmetic::Frem: | 1423 case InstArithmetic::Frem: |
| 1302 llvm_unreachable("FP instruction with i64 type"); | 1424 llvm_unreachable("FP instruction with i64 type"); |
| 1303 break; | 1425 break; |
| 1304 case InstArithmetic::Udiv: | 1426 case InstArithmetic::Udiv: |
| 1305 case InstArithmetic::Sdiv: | 1427 case InstArithmetic::Sdiv: |
| (...skipping 4045 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5351 } | 5473 } |
| 5352 // the offset is not eligible for blinding or pooling, return the original | 5474 // the offset is not eligible for blinding or pooling, return the original |
| 5353 // mem operand | 5475 // mem operand |
| 5354 return MemOperand; | 5476 return MemOperand; |
| 5355 } | 5477 } |
| 5356 | 5478 |
| 5357 } // end of namespace X86Internal | 5479 } // end of namespace X86Internal |
| 5358 } // end of namespace Ice | 5480 } // end of namespace Ice |
| 5359 | 5481 |
| 5360 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5482 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |