Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(310)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1351133003: Optimize 64-bit shifts by constants for x86-32 (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Code review updates Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/llvm2ice_tests/shift.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 1167 matching lines...) Expand 10 before | Expand all | Expand 10 after
1178 // The mul instruction produces two dest variables, edx:eax. We create a 1178 // The mul instruction produces two dest variables, edx:eax. We create a
1179 // fake definition of edx to account for this. 1179 // fake definition of edx to account for this.
1180 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo)); 1180 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
1181 _mov(DestLo, T_4Lo); 1181 _mov(DestLo, T_4Lo);
1182 _add(T_4Hi, T_1); 1182 _add(T_4Hi, T_1);
1183 _add(T_4Hi, T_2); 1183 _add(T_4Hi, T_2);
1184 _mov(DestHi, T_4Hi); 1184 _mov(DestHi, T_4Hi);
1185 } break; 1185 } break;
1186 case InstArithmetic::Shl: { 1186 case InstArithmetic::Shl: {
1187 // TODO: Refactor the similarities between Shl, Lshr, and Ashr. 1187 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
1188 // gcc does the following:
1189 // a=b<<c ==>
1190 // t1:ecx = c.lo & 0xff
1191 // t2 = b.lo
1192 // t3 = b.hi
1193 // t3 = shld t3, t2, t1
1194 // t2 = shl t2, t1
1195 // test t1, 0x20
1196 // je L1
1197 // use(t3)
1198 // t3 = t2
1199 // t2 = 0
1200 // L1:
1201 // a.lo = t2
1202 // a.hi = t3
1203 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; 1188 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1204 Constant *BitTest = Ctx->getConstantInt32(0x20);
1205 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1189 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1206 typename Traits::Insts::Label *Label = 1190 if (const auto *ConstantShiftAmount =
1207 Traits::Insts::Label::create(Func, this); 1191 llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
1208 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); 1192 uint32_t ShiftAmount = ConstantShiftAmount->getValue();
1209 _mov(T_2, Src0Lo); 1193 if (ShiftAmount > 32) {
1210 _mov(T_3, Src0Hi); 1194 // a=b<<c ==>
1211 _shld(T_3, T_2, T_1); 1195 // t2 = b.lo
1212 _shl(T_2, T_1); 1196 // t2 = shl t2, ShiftAmount-32
1213 _test(T_1, BitTest); 1197 // t3 = t2
1214 _br(Traits::Cond::Br_e, Label); 1198 // t2 = 0
1215 // T_2 and T_3 are being assigned again because of the intra-block 1199 _mov(T_2, Src0Lo);
1216 // control flow, so we need the _mov_nonkillable variant to avoid 1200 _shl(T_2, Ctx->getConstantInt32(ShiftAmount - 32));
1217 // liveness problems. 1201 _mov(DestHi, T_2);
1218 _mov_nonkillable(T_3, T_2); 1202 _mov(DestLo, Zero);
1219 _mov_nonkillable(T_2, Zero); 1203 } else if (ShiftAmount == 32) {
1220 Context.insert(Label); 1204 // a=b<<c ==>
1221 _mov(DestLo, T_2); 1205 // t2 = b.lo
1222 _mov(DestHi, T_3); 1206 // a.hi = t2
1207 // a.lo = 0
1208 _mov(T_2, Src0Lo);
1209 _mov(DestHi, T_2);
1210 _mov(DestLo, Zero);
1211 } else {
1212 // a=b<<c ==>
1213 // t2 = b.lo
1214 // t3 = b.hi
1215 // t3 = shld t3, t2, ShiftAmount
1216 // t2 = shl t2, ShiftAmount
1217 // a.lo = t2
1218 // a.hi = t3
1219 _mov(T_2, Src0Lo);
1220 _mov(T_3, Src0Hi);
1221 _shld(T_3, T_2, Ctx->getConstantInt32(ShiftAmount));
1222 _shl(T_2, Ctx->getConstantInt32(ShiftAmount));
1223 // Move T_2 first to reduce register pressure.
1224 _mov(DestLo, T_2);
1225 _mov(DestHi, T_3);
1226 }
1227 } else {
1228 // a=b<<c ==>
1229 // t1:ecx = c.lo & 0xff
1230 // t2 = b.lo
1231 // t3 = b.hi
1232 // t3 = shld t3, t2, t1
1233 // t2 = shl t2, t1
1234 // test t1, 0x20
1235 // je L1
1236 // use(t3)
1237 // t3 = t2
1238 // t2 = 0
1239 // L1:
1240 // a.lo = t2
1241 // a.hi = t3
1242 Constant *BitTest = Ctx->getConstantInt32(0x20);
1243 typename Traits::Insts::Label *Label =
1244 Traits::Insts::Label::create(Func, this);
1245 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
1246 _mov(T_2, Src0Lo);
1247 _mov(T_3, Src0Hi);
1248 _shld(T_3, T_2, T_1);
1249 _shl(T_2, T_1);
1250 _test(T_1, BitTest);
John 2015/09/22 16:18:37 would _cmp be safer here? (e.g., T_1 == 512)
1251 _br(Traits::Cond::Br_e, Label);
1252 // T_2 and T_3 are being assigned again because of the intra-block
1253 // control flow, so we need the _mov_nonkillable variant to avoid
1254 // liveness problems.
1255 _mov_nonkillable(T_3, T_2);
1256 _mov_nonkillable(T_2, Zero);
1257 Context.insert(Label);
1258 _mov(DestLo, T_2);
1259 _mov(DestHi, T_3);
1260 }
1223 } break; 1261 } break;
1224 case InstArithmetic::Lshr: { 1262 case InstArithmetic::Lshr: {
1225 // a=b>>c (unsigned) ==>
1226 // t1:ecx = c.lo & 0xff
1227 // t2 = b.lo
1228 // t3 = b.hi
1229 // t2 = shrd t2, t3, t1
1230 // t3 = shr t3, t1
1231 // test t1, 0x20
1232 // je L1
1233 // use(t2)
1234 // t2 = t3
1235 // t3 = 0
1236 // L1:
1237 // a.lo = t2
1238 // a.hi = t3
1239 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; 1263 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1240 Constant *BitTest = Ctx->getConstantInt32(0x20);
1241 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1264 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1242 typename Traits::Insts::Label *Label = 1265 if (const auto *ConstantShiftAmount =
1243 Traits::Insts::Label::create(Func, this); 1266 llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
1244 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); 1267 uint32_t ShiftAmount = ConstantShiftAmount->getValue();
1245 _mov(T_2, Src0Lo); 1268 if (ShiftAmount > 32) {
1246 _mov(T_3, Src0Hi); 1269 // a=b>>c (unsigned) ==>
1247 _shrd(T_2, T_3, T_1); 1270 // t3 = b.hi
1248 _shr(T_3, T_1); 1271 // t3 = shr t3, ShiftAmount-32
1249 _test(T_1, BitTest); 1272 // a.lo = t3
1250 _br(Traits::Cond::Br_e, Label); 1273 // a.hi = 0
1251 // T_2 and T_3 are being assigned again because of the intra-block 1274 _mov(T_3, Src0Hi);
1252 // control flow, so we need the _mov_nonkillable variant to avoid 1275 _shr(T_3, Ctx->getConstantInt32(ShiftAmount - 32));
1253 // liveness problems. 1276 _mov(DestLo, T_3);
1254 _mov_nonkillable(T_2, T_3); 1277 _mov(DestHi, Zero);
1255 _mov_nonkillable(T_3, Zero); 1278 } else if (ShiftAmount == 32) {
1256 Context.insert(Label); 1279 // a=b>>c (unsigned) ==>
1257 _mov(DestLo, T_2); 1280 // t3 = b.hi
1258 _mov(DestHi, T_3); 1281 // a.lo = t3
1282 // a.hi = 0
1283 _mov(T_3, Src0Hi);
1284 _mov(DestLo, T_3);
1285 _mov(DestHi, Zero);
1286 } else {
1287 // a=b>>c (unsigned) ==>
1288 // t2 = b.lo
1289 // t3 = b.hi
1290 // t2 = shrd t2, t3, ShiftAmount
1291 // t3 = shr t3, ShiftAmount
1292 // a.lo = t2
1293 // a.hi = t3
1294 _mov(T_2, Src0Lo);
1295 _mov(T_3, Src0Hi);
1296 _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount));
1297 _shr(T_3, Ctx->getConstantInt32(ShiftAmount));
1298 // Move T_3 first to reduce register pressure.
1299 _mov(DestHi, T_3);
1300 _mov(DestLo, T_2);
1301 }
1302 } else {
1303 // a=b>>c (unsigned) ==>
1304 // t1:ecx = c.lo & 0xff
1305 // t2 = b.lo
1306 // t3 = b.hi
1307 // t2 = shrd t2, t3, t1
1308 // t3 = shr t3, t1
1309 // test t1, 0x20
1310 // je L1
1311 // use(t2)
1312 // t2 = t3
1313 // t3 = 0
1314 // L1:
1315 // a.lo = t2
1316 // a.hi = t3
1317 Constant *BitTest = Ctx->getConstantInt32(0x20);
1318 typename Traits::Insts::Label *Label =
1319 Traits::Insts::Label::create(Func, this);
1320 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
1321 _mov(T_2, Src0Lo);
1322 _mov(T_3, Src0Hi);
1323 _shrd(T_2, T_3, T_1);
1324 _shr(T_3, T_1);
1325 _test(T_1, BitTest);
1326 _br(Traits::Cond::Br_e, Label);
1327 // T_2 and T_3 are being assigned again because of the intra-block
1328 // control flow, so we need the _mov_nonkillable variant to avoid
1329 // liveness problems.
1330 _mov_nonkillable(T_2, T_3);
1331 _mov_nonkillable(T_3, Zero);
1332 Context.insert(Label);
1333 _mov(DestLo, T_2);
1334 _mov(DestHi, T_3);
1335 }
1259 } break; 1336 } break;
1260 case InstArithmetic::Ashr: { 1337 case InstArithmetic::Ashr: {
1261 // a=b>>c (signed) ==>
1262 // t1:ecx = c.lo & 0xff
1263 // t2 = b.lo
1264 // t3 = b.hi
1265 // t2 = shrd t2, t3, t1
1266 // t3 = sar t3, t1
1267 // test t1, 0x20
1268 // je L1
1269 // use(t2)
1270 // t2 = t3
1271 // t3 = sar t3, 0x1f
1272 // L1:
1273 // a.lo = t2
1274 // a.hi = t3
1275 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; 1338 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1276 Constant *BitTest = Ctx->getConstantInt32(0x20); 1339 if (const auto *ConstantShiftAmount =
1277 Constant *SignExtend = Ctx->getConstantInt32(0x1f); 1340 llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
1278 typename Traits::Insts::Label *Label = 1341 uint32_t ShiftAmount = ConstantShiftAmount->getValue();
1279 Traits::Insts::Label::create(Func, this); 1342 if (ShiftAmount > 32) {
1280 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); 1343 // a=b>>c (signed) ==>
1281 _mov(T_2, Src0Lo); 1344 // t2 = b.hi
1282 _mov(T_3, Src0Hi); 1345 // t3 = b.hi
1283 _shrd(T_2, T_3, T_1); 1346 // t3 = sar t3, 0x1f
1284 _sar(T_3, T_1); 1347 // t2 = shrd t2, t3, ShiftAmount-32
1285 _test(T_1, BitTest); 1348 // a.lo = t2
1286 _br(Traits::Cond::Br_e, Label); 1349 // a.hi = t3
1287 // T_2 and T_3 are being assigned again because of the intra-block 1350 _mov(T_2, Src0Hi);
1288 // control flow, so T_2 needs the _mov_nonkillable variant to avoid 1351 _mov(T_3, Src0Hi);
1289 // liveness problems. T_3 doesn't need special treatment because it is 1352 _sar(T_3, Ctx->getConstantInt32(0x1f));
1290 // reassigned via _sar instead of _mov. 1353 _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount - 32));
1291 _mov_nonkillable(T_2, T_3); 1354 _mov(DestLo, T_2);
1292 _sar(T_3, SignExtend); 1355 _mov(DestHi, T_3);
1293 Context.insert(Label); 1356 } else if (ShiftAmount == 32) {
1294 _mov(DestLo, T_2); 1357 // a=b>>c (signed) ==>
1295 _mov(DestHi, T_3); 1358 // t2 = b.hi
1359 // a.lo = t2
1360 // t3 = b.hi
1361 // t3 = sar t3, 0x1f
1362 // a.hi = t3
1363 _mov(T_2, Src0Hi);
1364 _mov(DestLo, T_2);
1365 _mov(T_3, Src0Hi);
1366 _sar(T_3, Ctx->getConstantInt32(0x1f));
1367 _mov(DestHi, T_3);
1368 } else {
1369 // a=b>>c (signed) ==>
1370 // t2 = b.lo
1371 // t3 = b.hi
1372 // t2 = shrd t2, t3, ShiftAmount
1373 // t3 = sar t3, ShiftAmount
1374 // a.lo = t2
1375 // a.hi = t3
1376 _mov(T_2, Src0Lo);
1377 _mov(T_3, Src0Hi);
1378 _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount));
1379 _sar(T_3, Ctx->getConstantInt32(ShiftAmount));
1380 _mov(DestLo, T_2);
1381 _mov(DestHi, T_3);
1382 }
1383 } else {
1384 // a=b>>c (signed) ==>
1385 // t1:ecx = c.lo & 0xff
1386 // t2 = b.lo
1387 // t3 = b.hi
1388 // t2 = shrd t2, t3, t1
1389 // t3 = sar t3, t1
1390 // test t1, 0x20
1391 // je L1
1392 // use(t2)
1393 // t2 = t3
1394 // t3 = sar t3, 0x1f
1395 // L1:
1396 // a.lo = t2
1397 // a.hi = t3
1398 Constant *BitTest = Ctx->getConstantInt32(0x20);
1399 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
1400 typename Traits::Insts::Label *Label =
1401 Traits::Insts::Label::create(Func, this);
1402 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
1403 _mov(T_2, Src0Lo);
1404 _mov(T_3, Src0Hi);
1405 _shrd(T_2, T_3, T_1);
1406 _sar(T_3, T_1);
1407 _test(T_1, BitTest);
1408 _br(Traits::Cond::Br_e, Label);
1409 // T_2 and T_3 are being assigned again because of the intra-block
1410 // control flow, so T_2 needs the _mov_nonkillable variant to avoid
1411 // liveness problems. T_3 doesn't need special treatment because it is
1412 // reassigned via _sar instead of _mov.
1413 _mov_nonkillable(T_2, T_3);
1414 _sar(T_3, SignExtend);
1415 Context.insert(Label);
1416 _mov(DestLo, T_2);
1417 _mov(DestHi, T_3);
1418 }
1296 } break; 1419 } break;
1297 case InstArithmetic::Fadd: 1420 case InstArithmetic::Fadd:
1298 case InstArithmetic::Fsub: 1421 case InstArithmetic::Fsub:
1299 case InstArithmetic::Fmul: 1422 case InstArithmetic::Fmul:
1300 case InstArithmetic::Fdiv: 1423 case InstArithmetic::Fdiv:
1301 case InstArithmetic::Frem: 1424 case InstArithmetic::Frem:
1302 llvm_unreachable("FP instruction with i64 type"); 1425 llvm_unreachable("FP instruction with i64 type");
1303 break; 1426 break;
1304 case InstArithmetic::Udiv: 1427 case InstArithmetic::Udiv:
1305 case InstArithmetic::Sdiv: 1428 case InstArithmetic::Sdiv:
(...skipping 4045 matching lines...) Expand 10 before | Expand all | Expand 10 after
5351 } 5474 }
5352 // the offset is not eligible for blinding or pooling, return the original 5475 // the offset is not eligible for blinding or pooling, return the original
5353 // mem operand 5476 // mem operand
5354 return MemOperand; 5477 return MemOperand;
5355 } 5478 }
5356 5479
5357 } // end of namespace X86Internal 5480 } // end of namespace X86Internal
5358 } // end of namespace Ice 5481 } // end of namespace Ice
5359 5482
5360 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5483 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/llvm2ice_tests/shift.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698