Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(149)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 443203003: Subzero: Use scalar arithmetic when no vector instruction exists. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Minor fixes: whitespace, formatting, variable names, etc. Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 1278 matching lines...) Expand 10 before | Expand all | Expand 10 after
1289 Variable *Src1R = LEGAL_HACK(Src1); 1289 Variable *Src1R = LEGAL_HACK(Src1);
1290 _pshufd(T2, Src0R, Mask1030); 1290 _pshufd(T2, Src0R, Mask1030);
1291 _pshufd(T3, Src1R, Mask1030); 1291 _pshufd(T3, Src1R, Mask1030);
1292 _pmuludq(T1, Src1R); 1292 _pmuludq(T1, Src1R);
1293 _pmuludq(T2, T3); 1293 _pmuludq(T2, T3);
1294 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); 1294 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));
1295 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); 1295 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));
1296 _movp(Dest, T4); 1296 _movp(Dest, T4);
1297 } else { 1297 } else {
1298 assert(Dest->getType() == IceType_v16i8); 1298 assert(Dest->getType() == IceType_v16i8);
1299 // Sz_mul_v16i8 1299 scalarizeArithmetic(InstArithmetic::Mul, Dest, Src0, Src1);
Jim Stichnoth 2014/08/07 19:58:10 Consider using Inst->getOp() instead of ::Mul?
wala 2014/08/07 20:30:35 Done.
1300 const IceString Helper = "Sz_mul_v16i8";
1301 const SizeT MaxSrcs = 2;
1302 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1303 Call->addArg(Src0);
1304 Call->addArg(Src1);
1305 lowerCall(Call);
1306 } 1300 }
1307 } break; 1301 } break;
1308 case InstArithmetic::Shl: { 1302 case InstArithmetic::Shl:
1309 // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8 1303 case InstArithmetic::Lshr:
1310 const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType()); 1304 case InstArithmetic::Ashr:
1311 const SizeT MaxSrcs = 2; 1305 case InstArithmetic::Udiv:
1312 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); 1306 case InstArithmetic::Urem:
1313 Call->addArg(Src0); 1307 case InstArithmetic::Sdiv:
1314 Call->addArg(Src1); 1308 case InstArithmetic::Srem:
1315 lowerCall(Call); 1309 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1316 } break; 1310 break;
1317 case InstArithmetic::Lshr: {
1318 // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8
1319 const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType());
1320 const SizeT MaxSrcs = 2;
1321 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1322 Call->addArg(Src0);
1323 Call->addArg(Src1);
1324 lowerCall(Call);
1325 } break;
1326 case InstArithmetic::Ashr: {
1327 // Sz_ashr_v4i32, Sz_ashr_v8i16, Sz_ashr_v16i8
1328 const IceString Helper = "Sz_ashr_" + typeIdentString(Dest->getType());
1329 const SizeT MaxSrcs = 2;
1330 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1331 Call->addArg(Src0);
1332 Call->addArg(Src1);
1333 lowerCall(Call);
1334 } break;
1335 case InstArithmetic::Udiv: {
1336 // Sz_udiv_v4i32, Sz_udiv_v8i16, Sz_udiv_v16i8
1337 const IceString Helper = "Sz_udiv_" + typeIdentString(Dest->getType());
1338 const SizeT MaxSrcs = 2;
1339 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1340 Call->addArg(Src0);
1341 Call->addArg(Src1);
1342 lowerCall(Call);
1343 } break;
1344 case InstArithmetic::Sdiv: {
1345 // Sz_sdiv_v4i32, Sz_sdiv_v8i16, Sz_sdiv_v16i8
1346 const IceString Helper = "Sz_sdiv_" + typeIdentString(Dest->getType());
1347 const SizeT MaxSrcs = 2;
1348 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1349 Call->addArg(Src0);
1350 Call->addArg(Src1);
1351 lowerCall(Call);
1352 } break;
1353 case InstArithmetic::Urem: {
1354 // Sz_urem_v4i32, Sz_urem_v8i16, Sz_urem_v16i8
1355 const IceString Helper = "Sz_urem_" + typeIdentString(Dest->getType());
1356 const SizeT MaxSrcs = 2;
1357 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1358 Call->addArg(Src0);
1359 Call->addArg(Src1);
1360 lowerCall(Call);
1361 } break;
1362 case InstArithmetic::Srem: {
1363 // Sz_srem_v4i32, Sz_srem_v8i16, Sz_srem_v16i8
1364 const IceString Helper = "Sz_srem_" + typeIdentString(Dest->getType());
1365 const SizeT MaxSrcs = 2;
1366 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
1367 Call->addArg(Src0);
1368 Call->addArg(Src1);
1369 lowerCall(Call);
1370 } break;
1371 case InstArithmetic::Fadd: { 1311 case InstArithmetic::Fadd: {
1372 Variable *T = makeReg(Dest->getType()); 1312 Variable *T = makeReg(Dest->getType());
1373 _movp(T, Src0); 1313 _movp(T, Src0);
1374 _addps(T, LEGAL_HACK(Src1)); 1314 _addps(T, LEGAL_HACK(Src1));
1375 _movp(Dest, T); 1315 _movp(Dest, T);
1376 } break; 1316 } break;
1377 case InstArithmetic::Fsub: { 1317 case InstArithmetic::Fsub: {
1378 Variable *T = makeReg(Dest->getType()); 1318 Variable *T = makeReg(Dest->getType());
1379 _movp(T, Src0); 1319 _movp(T, Src0);
1380 _subps(T, LEGAL_HACK(Src1)); 1320 _subps(T, LEGAL_HACK(Src1));
1381 _movp(Dest, T); 1321 _movp(Dest, T);
1382 } break; 1322 } break;
1383 case InstArithmetic::Fmul: { 1323 case InstArithmetic::Fmul: {
1384 Variable *T = makeReg(Dest->getType()); 1324 Variable *T = makeReg(Dest->getType());
1385 _movp(T, Src0); 1325 _movp(T, Src0);
1386 _mulps(T, LEGAL_HACK(Src1)); 1326 _mulps(T, LEGAL_HACK(Src1));
1387 _movp(Dest, T); 1327 _movp(Dest, T);
1388 } break; 1328 } break;
1389 case InstArithmetic::Fdiv: { 1329 case InstArithmetic::Fdiv: {
1390 Variable *T = makeReg(Dest->getType()); 1330 Variable *T = makeReg(Dest->getType());
1391 _movp(T, Src0); 1331 _movp(T, Src0);
1392 _divps(T, LEGAL_HACK(Src1)); 1332 _divps(T, LEGAL_HACK(Src1));
1393 _movp(Dest, T); 1333 _movp(Dest, T);
1394 } break; 1334 } break;
1395 case InstArithmetic::Frem: { 1335 case InstArithmetic::Frem:
1396 const SizeT MaxSrcs = 2; 1336 scalarizeArithmetic(InstArithmetic::Frem, Dest, Src0, Src1);
Jim Stichnoth 2014/08/07 19:58:10 Consider using Inst->getOp() instead of ::Frem?
wala 2014/08/07 20:30:35 Done.
1397 InstCall *Call = makeHelperCall("Sz_frem_v4f32", Dest, MaxSrcs); 1337 break;
1398 Call->addArg(Src0);
1399 Call->addArg(Src1);
1400 lowerCall(Call);
1401 } break;
1402 } 1338 }
1403 #undef LEGAL_HACK 1339 #undef LEGAL_HACK
1404 } else { // Dest->getType() is non-i64 scalar 1340 } else { // Dest->getType() is non-i64 scalar
1405 Variable *T_edx = NULL; 1341 Variable *T_edx = NULL;
1406 Variable *T = NULL; 1342 Variable *T = NULL;
1407 switch (Inst->getOp()) { 1343 switch (Inst->getOp()) {
1408 case InstArithmetic::_num: 1344 case InstArithmetic::_num:
1409 llvm_unreachable("Unknown arithmetic operator"); 1345 llvm_unreachable("Unknown arithmetic operator");
1410 break; 1346 break;
1411 case InstArithmetic::Add: 1347 case InstArithmetic::Add:
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
1483 } else { 1419 } else {
1484 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1420 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1485 _mov(T, Src0, Reg_eax); 1421 _mov(T, Src0, Reg_eax);
1486 _mov(T_edx, Zero, Reg_edx); 1422 _mov(T_edx, Zero, Reg_edx);
1487 _div(T, Src1, T_edx); 1423 _div(T, Src1, T_edx);
1488 _mov(Dest, T); 1424 _mov(Dest, T);
1489 } 1425 }
1490 break; 1426 break;
1491 case InstArithmetic::Sdiv: 1427 case InstArithmetic::Sdiv:
1492 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1428 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1493 T_edx = makeReg(IceType_i32, Reg_edx); 1429 if (Dest->getType() == IceType_i8) {
1494 _mov(T, Src0, Reg_eax); 1430 _mov(T, Src0, Reg_eax);
1495 _cdq(T_edx, T); 1431 _cbwdq(T, T);
1496 _idiv(T, Src1, T_edx); 1432 _idiv(T, Src1, T);
1497 _mov(Dest, T); 1433 _mov(Dest, T);
1434 } else {
1435 T_edx = makeReg(IceType_i32, Reg_edx);
1436 _mov(T, Src0, Reg_eax);
1437 _cbwdq(T_edx, T);
1438 _idiv(T, Src1, T_edx);
1439 _mov(Dest, T);
1440 }
1498 break; 1441 break;
1499 case InstArithmetic::Urem: 1442 case InstArithmetic::Urem:
1500 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1443 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1501 if (Dest->getType() == IceType_i8) { 1444 if (Dest->getType() == IceType_i8) {
1502 Variable *T_ah = NULL; 1445 Variable *T_ah = NULL;
1503 Constant *Zero = Ctx->getConstantZero(IceType_i8); 1446 Constant *Zero = Ctx->getConstantZero(IceType_i8);
1504 _mov(T, Src0, Reg_eax); 1447 _mov(T, Src0, Reg_eax);
1505 _mov(T_ah, Zero, Reg_ah); 1448 _mov(T_ah, Zero, Reg_ah);
1506 _div(T_ah, Src1, T); 1449 _div(T_ah, Src1, T);
1507 _mov(Dest, T_ah); 1450 _mov(Dest, T_ah);
1508 } else { 1451 } else {
1509 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1452 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1510 _mov(T_edx, Zero, Reg_edx); 1453 _mov(T_edx, Zero, Reg_edx);
1511 _mov(T, Src0, Reg_eax); 1454 _mov(T, Src0, Reg_eax);
1512 _div(T_edx, Src1, T); 1455 _div(T_edx, Src1, T);
1513 _mov(Dest, T_edx); 1456 _mov(Dest, T_edx);
1514 } 1457 }
1515 break; 1458 break;
1516 case InstArithmetic::Srem: 1459 case InstArithmetic::Srem:
1517 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1460 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1518 T_edx = makeReg(IceType_i32, Reg_edx); 1461 if (Dest->getType() == IceType_i8) {
1519 _mov(T, Src0, Reg_eax); 1462 Variable *T_ah = makeReg(IceType_i8, Reg_ah);
1520 _cdq(T_edx, T); 1463 _mov(T, Src0, Reg_eax);
1521 _idiv(T_edx, Src1, T); 1464 _cbwdq(T, T);
1522 _mov(Dest, T_edx); 1465 Context.insert(InstFakeDef::create(Func, T_ah));
1466 _idiv(T_ah, Src1, T);
1467 _mov(Dest, T_ah);
1468 } else {
1469 T_edx = makeReg(IceType_i32, Reg_edx);
1470 _mov(T, Src0, Reg_eax);
1471 _cbwdq(T_edx, T);
1472 _idiv(T_edx, Src1, T);
1473 _mov(Dest, T_edx);
1474 }
1523 break; 1475 break;
1524 case InstArithmetic::Fadd: 1476 case InstArithmetic::Fadd:
1525 _mov(T, Src0); 1477 _mov(T, Src0);
1526 _addss(T, Src1); 1478 _addss(T, Src1);
1527 _mov(Dest, T); 1479 _mov(Dest, T);
1528 break; 1480 break;
1529 case InstArithmetic::Fsub: 1481 case InstArithmetic::Fsub:
1530 _mov(T, Src0); 1482 _mov(T, Src0);
1531 _subss(T, Src1); 1483 _subss(T, Src1);
1532 _mov(Dest, T); 1484 _mov(Dest, T);
(...skipping 2204 matching lines...) Expand 10 before | Expand all | Expand 10 after
3737 Src0 = legalize(Src0, Legal_All, true); 3689 Src0 = legalize(Src0, Legal_All, true);
3738 for (SizeT I = 0; I < NumCases; ++I) { 3690 for (SizeT I = 0; I < NumCases; ++I) {
3739 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I)); 3691 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I));
3740 _cmp(Src0, Value); 3692 _cmp(Src0, Value);
3741 _br(InstX8632Br::Br_e, Inst->getLabel(I)); 3693 _br(InstX8632Br::Br_e, Inst->getLabel(I));
3742 } 3694 }
3743 3695
3744 _br(Inst->getLabelDefault()); 3696 _br(Inst->getLabelDefault());
3745 } 3697 }
3746 3698
3699 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
Jim Stichnoth 2014/08/07 19:58:10 Name the arg "Kind", or something else more descri
wala 2014/08/07 20:30:35 Done.
3700 Operand *Src0, Operand *Src1) {
3701 assert(isVectorType(Dest->getType()));
3702 Type Ty = Dest->getType();
3703 Type ElementTy = typeElementType(Ty);
3704 SizeT NumElements = typeNumElements(Ty);
3705
3706 Operand *T = Ctx->getConstantUndef(Ty);
3707 for (SizeT I = 0; I < NumElements; ++I) {
3708 Constant *Index = Ctx->getConstantInt(IceType_i32, I);
3709
3710 // Extract the next two inputs.
3711 Variable *Op0 = Func->makeVariable(ElementTy, Context.getNode());
3712 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));
3713 Variable *Op1 = Func->makeVariable(ElementTy, Context.getNode());
3714 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
3715
3716 // Perform the arithmetic as a scalar operation.
3717 Variable *Res = Func->makeVariable(ElementTy, Context.getNode());
3718 lowerArithmetic(InstArithmetic::create(Func, K, Res, Op0, Op1));
3719
3720 // Insert the result into position.
3721 Variable *DestT = Func->makeVariable(Ty, Context.getNode());
3722 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
3723 T = DestT;
3724 // TODO: Use postLower() in -Om1 mode to avoid buildup of infinite
3725 // weight temporaries.
3726 }
3727
3728 lowerAssign(InstAssign::create(Func, Dest, T));
3729 }
3730
3747 // The following pattern occurs often in lowered C and C++ code: 3731 // The following pattern occurs often in lowered C and C++ code:
3748 // 3732 //
3749 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 3733 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1
3750 // %cmp.ext = sext <n x i1> %cmp to <n x ty> 3734 // %cmp.ext = sext <n x i1> %cmp to <n x ty>
3751 // 3735 //
3752 // We can eliminate the sext operation by copying the result of pcmpeqd, 3736 // We can eliminate the sext operation by copying the result of pcmpeqd,
3753 // pcmpgtd, or cmpps (which produce sign extended results) to the result 3737 // pcmpgtd, or cmpps (which produce sign extended results) to the result
3754 // of the sext operation. 3738 // of the sext operation.
3755 void 3739 void
3756 TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) { 3740 TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) {
(...skipping 406 matching lines...) Expand 10 before | Expand all | Expand 10 after
4163 for (SizeT i = 0; i < Size; ++i) { 4147 for (SizeT i = 0; i < Size; ++i) {
4164 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 4148 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
4165 } 4149 }
4166 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 4150 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4167 } 4151 }
4168 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 4152 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
4169 << "\n"; 4153 << "\n";
4170 } 4154 }
4171 4155
4172 } // end of namespace Ice 4156 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698