Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(37)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1351133003: Optimize 64-bit shifts by constants for x86-32 (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 1166 matching lines...) Expand 10 before | Expand all | Expand 10 after
1177 _mul(T_4Lo, T_3, Src1Lo); 1177 _mul(T_4Lo, T_3, Src1Lo);
1178 // The mul instruction produces two dest variables, edx:eax. We create a 1178 // The mul instruction produces two dest variables, edx:eax. We create a
1179 // fake definition of edx to account for this. 1179 // fake definition of edx to account for this.
1180 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo)); 1180 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
1181 _mov(DestLo, T_4Lo); 1181 _mov(DestLo, T_4Lo);
1182 _add(T_4Hi, T_1); 1182 _add(T_4Hi, T_1);
1183 _add(T_4Hi, T_2); 1183 _add(T_4Hi, T_2);
1184 _mov(DestHi, T_4Hi); 1184 _mov(DestHi, T_4Hi);
1185 } break; 1185 } break;
1186 case InstArithmetic::Shl: { 1186 case InstArithmetic::Shl: {
1187 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
Jim Stichnoth 2015/09/22 05:48:46 Don't delete this TODO unless you're taking care o
sehr 2015/09/22 16:03:17 Done.
1188 // gcc does the following:
1189 // a=b<<c ==>
1190 // t1:ecx = c.lo & 0xff
1191 // t2 = b.lo
1192 // t3 = b.hi
1193 // t3 = shld t3, t2, t1
1194 // t2 = shl t2, t1
1195 // test t1, 0x20
1196 // je L1
1197 // use(t3)
1198 // t3 = t2
1199 // t2 = 0
1200 // L1:
1201 // a.lo = t2
1202 // a.hi = t3
1203 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; 1187 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1204 Constant *BitTest = Ctx->getConstantInt32(0x20);
1205 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1188 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1206 typename Traits::Insts::Label *Label = 1189 if (const auto *ConstantShiftAmount =
1207 Traits::Insts::Label::create(Func, this); 1190 llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
1208 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); 1191 uint32_t ShiftAmount = ConstantShiftAmount->getValue();
1209 _mov(T_2, Src0Lo); 1192 if (ShiftAmount > 32) {
1210 _mov(T_3, Src0Hi); 1193 // a=b<<c ==>
1211 _shld(T_3, T_2, T_1); 1194 // t2 = b.lo
1212 _shl(T_2, T_1); 1195 // t2 = shl t2, ShiftAmount-32
1213 _test(T_1, BitTest); 1196 // t3 = t2
1214 _br(Traits::Cond::Br_e, Label); 1197 // t2 = 0
1215 // T_2 and T_3 are being assigned again because of the intra-block 1198 _mov(T_2, Src0Lo);
1216 // control flow, so we need the _mov_nonkillable variant to avoid 1199 _shl(T_2, Ctx->getConstantInt32(ShiftAmount-32));
Jim Stichnoth 2015/09/22 05:48:46 Please run "make -f Makefile.standalone format" to
sehr 2015/09/22 16:03:17 Done.
1217 // liveness problems. 1200 _mov(DestHi, T_2);
1218 _mov_nonkillable(T_3, T_2); 1201 _mov(DestLo, Zero);
1219 _mov_nonkillable(T_2, Zero); 1202 } else if (ShiftAmount == 32) {
1220 Context.insert(Label); 1203 // a=b<<c ==>
1221 _mov(DestLo, T_2); 1204 // t2 = b.lo
1222 _mov(DestHi, T_3); 1205 // a.hi = t2
1206 // a.lo = 0
1207 _mov(T_2, Src0Lo);
1208 _mov(DestHi, T_2);
1209 _mov(DestLo, Zero);
1210 } else {
1211 // a=b<<c ==>
1212 // t2 = b.lo
1213 // t3 = b.hi
1214 // t3 = shld t3, t2, ShiftAmount
1215 // t2 = shl t2, ShiftAmount
1216 // a.lo = t2
1217 // a.hi = t3
1218 _mov(T_2, Src0Lo);
1219 _mov(T_3, Src0Hi);
1220 _shld(T_3, T_2, Ctx->getConstantInt32(ShiftAmount));
1221 _shl(T_2, Ctx->getConstantInt32(ShiftAmount));
1222 // Move T_2 first to reduce register pressure.
1223 _mov(DestLo, T_2);
1224 _mov(DestHi, T_3);
1225 }
1226 } else {
1227 // a=b<<c ==>
1228 // t1:ecx = c.lo & 0xff
1229 // t2 = b.lo
1230 // t3 = b.hi
1231 // t3 = shld t3, t2, t1
1232 // t2 = shl t2, t1
1233 // test t1, 0x20
1234 // je L1
1235 // use(t3)
1236 // t3 = t2
1237 // t2 = 0
1238 // L1:
1239 // a.lo = t2
1240 // a.hi = t3
1241 Constant *BitTest = Ctx->getConstantInt32(0x20);
1242 typename Traits::Insts::Label *Label =
1243 Traits::Insts::Label::create(Func, this);
1244 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
1245 _mov(T_2, Src0Lo);
1246 _mov(T_3, Src0Hi);
1247 _shld(T_3, T_2, T_1);
1248 _shl(T_2, T_1);
1249 _test(T_1, BitTest);
1250 _br(Traits::Cond::Br_e, Label);
1251 // T_2 and T_3 are being assigned again because of the intra-block
1252 // control flow, so we need the _mov_nonkillable variant to avoid
1253 // liveness problems.
1254 _mov_nonkillable(T_3, T_2);
1255 _mov_nonkillable(T_2, Zero);
1256 Context.insert(Label);
1257 _mov(DestLo, T_2);
1258 _mov(DestHi, T_3);
1259 }
1223 } break; 1260 } break;
1224 case InstArithmetic::Lshr: { 1261 case InstArithmetic::Lshr: {
1225 // a=b>>c (unsigned) ==>
1226 // t1:ecx = c.lo & 0xff
1227 // t2 = b.lo
1228 // t3 = b.hi
1229 // t2 = shrd t2, t3, t1
1230 // t3 = shr t3, t1
1231 // test t1, 0x20
1232 // je L1
1233 // use(t2)
1234 // t2 = t3
1235 // t3 = 0
1236 // L1:
1237 // a.lo = t2
1238 // a.hi = t3
1239 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; 1262 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1240 Constant *BitTest = Ctx->getConstantInt32(0x20);
1241 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1263 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1242 typename Traits::Insts::Label *Label = 1264 if (const auto *ConstantShiftAmount =
1243 Traits::Insts::Label::create(Func, this); 1265 llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
1244 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); 1266 uint32_t ShiftAmount = ConstantShiftAmount->getValue();
1245 _mov(T_2, Src0Lo); 1267 if (ShiftAmount > 32) {
1246 _mov(T_3, Src0Hi); 1268 // a=b>>c (unsigned) ==>
1247 _shrd(T_2, T_3, T_1); 1269 // t3 = b.hi
1248 _shr(T_3, T_1); 1270 // t3 = shr t3, ShiftAmount-32
1249 _test(T_1, BitTest); 1271 // a.lo = t3
1250 _br(Traits::Cond::Br_e, Label); 1272 // a.hi = 0
1251 // T_2 and T_3 are being assigned again because of the intra-block 1273 _mov(T_3, Src0Hi);
1252 // control flow, so we need the _mov_nonkillable variant to avoid 1274 _shr(T_3, Ctx->getConstantInt32(ShiftAmount-32));
1253 // liveness problems. 1275 _mov(DestLo, T_3);
1254 _mov_nonkillable(T_2, T_3); 1276 _mov(DestHi, Zero);
1255 _mov_nonkillable(T_3, Zero); 1277 } else if (ShiftAmount == 32) {
1256 Context.insert(Label); 1278 // a=b>>c (unsigned) ==>
1257 _mov(DestLo, T_2); 1279 // t3 = b.hi
1258 _mov(DestHi, T_3); 1280 // a.lo = t3
1281 // a.hi = 0
1282 _mov(T_3, Src0Hi);
1283 _mov(DestLo, T_3);
1284 _mov(DestHi, Zero);
1285 } else {
1286 // a=b>>c (unsigned) ==>
1287 // t2 = b.lo
1288 // t3 = b.hi
1289 // t2 = shrd t2, t3, ShiftAmount
1290 // t3 = shr t3, ShiftAmount
1291 // a.lo = t2
1292 // a.hi = t3
1293 _mov(T_2, Src0Lo);
1294 _mov(T_3, Src0Hi);
1295 _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount));
1296 _shr(T_3, Ctx->getConstantInt32(ShiftAmount));
1297 // Move T_3 first to reduce register pressure.
1298 _mov(DestHi, T_3);
1299 _mov(DestLo, T_2);
1300 }
1301 } else {
1302 // a=b>>c (unsigned) ==>
1303 // t1:ecx = c.lo & 0xff
1304 // t2 = b.lo
1305 // t3 = b.hi
1306 // t2 = shrd t2, t3, t1
1307 // t3 = shr t3, t1
1308 // test t1, 0x20
1309 // je L1
1310 // use(t2)
1311 // t2 = t3
1312 // t3 = 0
1313 // L1:
1314 // a.lo = t2
1315 // a.hi = t3
1316 Constant *BitTest = Ctx->getConstantInt32(0x20);
1317 typename Traits::Insts::Label *Label =
1318 Traits::Insts::Label::create(Func, this);
1319 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
1320 _mov(T_2, Src0Lo);
1321 _mov(T_3, Src0Hi);
1322 _shrd(T_2, T_3, T_1);
1323 _shr(T_3, T_1);
1324 _test(T_1, BitTest);
1325 _br(Traits::Cond::Br_e, Label);
1326 // T_2 and T_3 are being assigned again because of the intra-block
1327 // control flow, so we need the _mov_nonkillable variant to avoid
1328 // liveness problems.
1329 _mov_nonkillable(T_2, T_3);
1330 _mov_nonkillable(T_3, Zero);
1331 Context.insert(Label);
1332 _mov(DestLo, T_2);
1333 _mov(DestHi, T_3);
1334 }
1259 } break; 1335 } break;
1260 case InstArithmetic::Ashr: { 1336 case InstArithmetic::Ashr: {
1261 // a=b>>c (signed) ==>
1262 // t1:ecx = c.lo & 0xff
1263 // t2 = b.lo
1264 // t3 = b.hi
1265 // t2 = shrd t2, t3, t1
1266 // t3 = sar t3, t1
1267 // test t1, 0x20
1268 // je L1
1269 // use(t2)
1270 // t2 = t3
1271 // t3 = sar t3, 0x1f
1272 // L1:
1273 // a.lo = t2
1274 // a.hi = t3
1275 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; 1337 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1276 Constant *BitTest = Ctx->getConstantInt32(0x20); 1338 if (const auto *ConstantShiftAmount =
1277 Constant *SignExtend = Ctx->getConstantInt32(0x1f); 1339 llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
1278 typename Traits::Insts::Label *Label = 1340 uint32_t ShiftAmount = ConstantShiftAmount->getValue();
1279 Traits::Insts::Label::create(Func, this); 1341 if (ShiftAmount > 32) {
1280 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); 1342 // a=b>>c (signed) ==>
1281 _mov(T_2, Src0Lo); 1343 // t2 = b.hi
1282 _mov(T_3, Src0Hi); 1344 // t3 = b.hi
1283 _shrd(T_2, T_3, T_1); 1345 // t3 = sar t3, 0x1f
1284 _sar(T_3, T_1); 1346 // t2 = shrd t2, t3, ShiftAmount-32
1285 _test(T_1, BitTest); 1347 // a.lo = t2
1286 _br(Traits::Cond::Br_e, Label); 1348 // a.hi = t3
1287 // T_2 and T_3 are being assigned again because of the intra-block 1349 _mov(T_2, Src0Hi);
1288 // control flow, so T_2 needs the _mov_nonkillable variant to avoid 1350 _mov(T_3, Src0Hi);
1289 // liveness problems. T_3 doesn't need special treatment because it is 1351 _sar(T_3, Ctx->getConstantInt32(0x1f));
1290 // reassigned via _sar instead of _mov. 1352 _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount-32));
1291 _mov_nonkillable(T_2, T_3); 1353 _mov(DestLo, T_2);
1292 _sar(T_3, SignExtend); 1354 _mov(DestHi, T_3);
1293 Context.insert(Label); 1355 } else if (ShiftAmount == 32) {
1294 _mov(DestLo, T_2); 1356 // a=b>>c (signed) ==>
1295 _mov(DestHi, T_3); 1357 // t2 = b.hi
1358 // a.lo = t2
1359 // t3 = b.hi
1360 // t3 = sar t3, 0x1f
1361 // a.hi = t3
1362 _mov(T_2, Src0Hi);
1363 _mov(DestLo, T_2);
1364 _mov(T_3, Src0Hi);
1365 _sar(T_3, Ctx->getConstantInt32(0x1f));
1366 _mov(DestHi, T_3);
1367 } else {
1368 // a=b>>c (signed) ==>
1369 // t2 = b.lo
1370 // t3 = b.hi
1371 // t2 = shrd t2, t3, ShiftAmount
1372 // t3 = sar t3, ShiftAmount
1373 // a.lo = t2
1374 // a.hi = t3
1375 _mov(T_2, Src0Lo);
1376 _mov(T_3, Src0Hi);
1377 _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount));
1378 _sar(T_3, Ctx->getConstantInt32(ShiftAmount));
1379 _mov(DestLo, T_2);
1380 _mov(DestHi, T_3);
1381 }
1382 } else {
1383 // a=b>>c (signed) ==>
1384 // t1:ecx = c.lo & 0xff
1385 // t2 = b.lo
1386 // t3 = b.hi
1387 // t2 = shrd t2, t3, t1
1388 // t3 = sar t3, t1
1389 // test t1, 0x20
1390 // je L1
1391 // use(t2)
1392 // t2 = t3
1393 // t3 = sar t3, 0x1f
1394 // L1:
1395 // a.lo = t2
1396 // a.hi = t3
1397 Constant *BitTest = Ctx->getConstantInt32(0x20);
1398 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
1399 typename Traits::Insts::Label *Label =
1400 Traits::Insts::Label::create(Func, this);
1401 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
1402 _mov(T_2, Src0Lo);
1403 _mov(T_3, Src0Hi);
1404 _shrd(T_2, T_3, T_1);
1405 _sar(T_3, T_1);
1406 _test(T_1, BitTest);
1407 _br(Traits::Cond::Br_e, Label);
1408 // T_2 and T_3 are being assigned again because of the intra-block
1409 // control flow, so T_2 needs the _mov_nonkillable variant to avoid
1410 // liveness problems. T_3 doesn't need special treatment because it is
1411 // reassigned via _sar instead of _mov.
1412 _mov_nonkillable(T_2, T_3);
1413 _sar(T_3, SignExtend);
1414 Context.insert(Label);
1415 _mov(DestLo, T_2);
1416 _mov(DestHi, T_3);
1417 }
1296 } break; 1418 } break;
1297 case InstArithmetic::Fadd: 1419 case InstArithmetic::Fadd:
1298 case InstArithmetic::Fsub: 1420 case InstArithmetic::Fsub:
1299 case InstArithmetic::Fmul: 1421 case InstArithmetic::Fmul:
1300 case InstArithmetic::Fdiv: 1422 case InstArithmetic::Fdiv:
1301 case InstArithmetic::Frem: 1423 case InstArithmetic::Frem:
1302 llvm_unreachable("FP instruction with i64 type"); 1424 llvm_unreachable("FP instruction with i64 type");
1303 break; 1425 break;
1304 case InstArithmetic::Udiv: 1426 case InstArithmetic::Udiv:
1305 case InstArithmetic::Sdiv: 1427 case InstArithmetic::Sdiv:
(...skipping 4045 matching lines...) Expand 10 before | Expand all | Expand 10 after
5351 } 5473 }
5352 // the offset is not eligible for blinding or pooling, return the original 5474 // the offset is not eligible for blinding or pooling, return the original
5353 // mem operand 5475 // mem operand
5354 return MemOperand; 5476 return MemOperand;
5355 } 5477 }
5356 5478
5357 } // end of namespace X86Internal 5479 } // end of namespace X86Internal
5358 } // end of namespace Ice 5480 } // end of namespace Ice
5359 5481
5360 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5482 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698