Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
| (...skipping 1278 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1289 Variable *Src1R = LEGAL_HACK(Src1); | 1289 Variable *Src1R = LEGAL_HACK(Src1); |
| 1290 _pshufd(T2, Src0R, Mask1030); | 1290 _pshufd(T2, Src0R, Mask1030); |
| 1291 _pshufd(T3, Src1R, Mask1030); | 1291 _pshufd(T3, Src1R, Mask1030); |
| 1292 _pmuludq(T1, Src1R); | 1292 _pmuludq(T1, Src1R); |
| 1293 _pmuludq(T2, T3); | 1293 _pmuludq(T2, T3); |
| 1294 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); | 1294 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); |
| 1295 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); | 1295 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); |
| 1296 _movp(Dest, T4); | 1296 _movp(Dest, T4); |
| 1297 } else { | 1297 } else { |
| 1298 assert(Dest->getType() == IceType_v16i8); | 1298 assert(Dest->getType() == IceType_v16i8); |
| 1299 // Sz_mul_v16i8 | 1299 scalarizeArithmetic(InstArithmetic::Mul, Dest, Src0, Src1); |
|
Jim Stichnoth
2014/08/07 19:58:10
Consider using Inst->getOp() instead of ::Mul?
wala
2014/08/07 20:30:35
Done.
| |
| 1300 const IceString Helper = "Sz_mul_v16i8"; | |
| 1301 const SizeT MaxSrcs = 2; | |
| 1302 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
| 1303 Call->addArg(Src0); | |
| 1304 Call->addArg(Src1); | |
| 1305 lowerCall(Call); | |
| 1306 } | 1300 } |
| 1307 } break; | 1301 } break; |
| 1308 case InstArithmetic::Shl: { | 1302 case InstArithmetic::Shl: |
| 1309 // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8 | 1303 case InstArithmetic::Lshr: |
| 1310 const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType()); | 1304 case InstArithmetic::Ashr: |
| 1311 const SizeT MaxSrcs = 2; | 1305 case InstArithmetic::Udiv: |
| 1312 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | 1306 case InstArithmetic::Urem: |
| 1313 Call->addArg(Src0); | 1307 case InstArithmetic::Sdiv: |
| 1314 Call->addArg(Src1); | 1308 case InstArithmetic::Srem: |
| 1315 lowerCall(Call); | 1309 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); |
| 1316 } break; | 1310 break; |
| 1317 case InstArithmetic::Lshr: { | |
| 1318 // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8 | |
| 1319 const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType()); | |
| 1320 const SizeT MaxSrcs = 2; | |
| 1321 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
| 1322 Call->addArg(Src0); | |
| 1323 Call->addArg(Src1); | |
| 1324 lowerCall(Call); | |
| 1325 } break; | |
| 1326 case InstArithmetic::Ashr: { | |
| 1327 // Sz_ashr_v4i32, Sz_ashr_v8i16, Sz_ashr_v16i8 | |
| 1328 const IceString Helper = "Sz_ashr_" + typeIdentString(Dest->getType()); | |
| 1329 const SizeT MaxSrcs = 2; | |
| 1330 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
| 1331 Call->addArg(Src0); | |
| 1332 Call->addArg(Src1); | |
| 1333 lowerCall(Call); | |
| 1334 } break; | |
| 1335 case InstArithmetic::Udiv: { | |
| 1336 // Sz_udiv_v4i32, Sz_udiv_v8i16, Sz_udiv_v16i8 | |
| 1337 const IceString Helper = "Sz_udiv_" + typeIdentString(Dest->getType()); | |
| 1338 const SizeT MaxSrcs = 2; | |
| 1339 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
| 1340 Call->addArg(Src0); | |
| 1341 Call->addArg(Src1); | |
| 1342 lowerCall(Call); | |
| 1343 } break; | |
| 1344 case InstArithmetic::Sdiv: { | |
| 1345 // Sz_sdiv_v4i32, Sz_sdiv_v8i16, Sz_sdiv_v16i8 | |
| 1346 const IceString Helper = "Sz_sdiv_" + typeIdentString(Dest->getType()); | |
| 1347 const SizeT MaxSrcs = 2; | |
| 1348 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
| 1349 Call->addArg(Src0); | |
| 1350 Call->addArg(Src1); | |
| 1351 lowerCall(Call); | |
| 1352 } break; | |
| 1353 case InstArithmetic::Urem: { | |
| 1354 // Sz_urem_v4i32, Sz_urem_v8i16, Sz_urem_v16i8 | |
| 1355 const IceString Helper = "Sz_urem_" + typeIdentString(Dest->getType()); | |
| 1356 const SizeT MaxSrcs = 2; | |
| 1357 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
| 1358 Call->addArg(Src0); | |
| 1359 Call->addArg(Src1); | |
| 1360 lowerCall(Call); | |
| 1361 } break; | |
| 1362 case InstArithmetic::Srem: { | |
| 1363 // Sz_srem_v4i32, Sz_srem_v8i16, Sz_srem_v16i8 | |
| 1364 const IceString Helper = "Sz_srem_" + typeIdentString(Dest->getType()); | |
| 1365 const SizeT MaxSrcs = 2; | |
| 1366 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
| 1367 Call->addArg(Src0); | |
| 1368 Call->addArg(Src1); | |
| 1369 lowerCall(Call); | |
| 1370 } break; | |
| 1371 case InstArithmetic::Fadd: { | 1311 case InstArithmetic::Fadd: { |
| 1372 Variable *T = makeReg(Dest->getType()); | 1312 Variable *T = makeReg(Dest->getType()); |
| 1373 _movp(T, Src0); | 1313 _movp(T, Src0); |
| 1374 _addps(T, LEGAL_HACK(Src1)); | 1314 _addps(T, LEGAL_HACK(Src1)); |
| 1375 _movp(Dest, T); | 1315 _movp(Dest, T); |
| 1376 } break; | 1316 } break; |
| 1377 case InstArithmetic::Fsub: { | 1317 case InstArithmetic::Fsub: { |
| 1378 Variable *T = makeReg(Dest->getType()); | 1318 Variable *T = makeReg(Dest->getType()); |
| 1379 _movp(T, Src0); | 1319 _movp(T, Src0); |
| 1380 _subps(T, LEGAL_HACK(Src1)); | 1320 _subps(T, LEGAL_HACK(Src1)); |
| 1381 _movp(Dest, T); | 1321 _movp(Dest, T); |
| 1382 } break; | 1322 } break; |
| 1383 case InstArithmetic::Fmul: { | 1323 case InstArithmetic::Fmul: { |
| 1384 Variable *T = makeReg(Dest->getType()); | 1324 Variable *T = makeReg(Dest->getType()); |
| 1385 _movp(T, Src0); | 1325 _movp(T, Src0); |
| 1386 _mulps(T, LEGAL_HACK(Src1)); | 1326 _mulps(T, LEGAL_HACK(Src1)); |
| 1387 _movp(Dest, T); | 1327 _movp(Dest, T); |
| 1388 } break; | 1328 } break; |
| 1389 case InstArithmetic::Fdiv: { | 1329 case InstArithmetic::Fdiv: { |
| 1390 Variable *T = makeReg(Dest->getType()); | 1330 Variable *T = makeReg(Dest->getType()); |
| 1391 _movp(T, Src0); | 1331 _movp(T, Src0); |
| 1392 _divps(T, LEGAL_HACK(Src1)); | 1332 _divps(T, LEGAL_HACK(Src1)); |
| 1393 _movp(Dest, T); | 1333 _movp(Dest, T); |
| 1394 } break; | 1334 } break; |
| 1395 case InstArithmetic::Frem: { | 1335 case InstArithmetic::Frem: |
| 1396 const SizeT MaxSrcs = 2; | 1336 scalarizeArithmetic(InstArithmetic::Frem, Dest, Src0, Src1); |
|
Jim Stichnoth
2014/08/07 19:58:10
Consider using Inst->getOp() instead of ::Frem?
wala
2014/08/07 20:30:35
Done.
| |
| 1397 InstCall *Call = makeHelperCall("Sz_frem_v4f32", Dest, MaxSrcs); | 1337 break; |
| 1398 Call->addArg(Src0); | |
| 1399 Call->addArg(Src1); | |
| 1400 lowerCall(Call); | |
| 1401 } break; | |
| 1402 } | 1338 } |
| 1403 #undef LEGAL_HACK | 1339 #undef LEGAL_HACK |
| 1404 } else { // Dest->getType() is non-i64 scalar | 1340 } else { // Dest->getType() is non-i64 scalar |
| 1405 Variable *T_edx = NULL; | 1341 Variable *T_edx = NULL; |
| 1406 Variable *T = NULL; | 1342 Variable *T = NULL; |
| 1407 switch (Inst->getOp()) { | 1343 switch (Inst->getOp()) { |
| 1408 case InstArithmetic::_num: | 1344 case InstArithmetic::_num: |
| 1409 llvm_unreachable("Unknown arithmetic operator"); | 1345 llvm_unreachable("Unknown arithmetic operator"); |
| 1410 break; | 1346 break; |
| 1411 case InstArithmetic::Add: | 1347 case InstArithmetic::Add: |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1483 } else { | 1419 } else { |
| 1484 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1420 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1485 _mov(T, Src0, Reg_eax); | 1421 _mov(T, Src0, Reg_eax); |
| 1486 _mov(T_edx, Zero, Reg_edx); | 1422 _mov(T_edx, Zero, Reg_edx); |
| 1487 _div(T, Src1, T_edx); | 1423 _div(T, Src1, T_edx); |
| 1488 _mov(Dest, T); | 1424 _mov(Dest, T); |
| 1489 } | 1425 } |
| 1490 break; | 1426 break; |
| 1491 case InstArithmetic::Sdiv: | 1427 case InstArithmetic::Sdiv: |
| 1492 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1428 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1493 T_edx = makeReg(IceType_i32, Reg_edx); | 1429 if (Dest->getType() == IceType_i8) { |
| 1494 _mov(T, Src0, Reg_eax); | 1430 _mov(T, Src0, Reg_eax); |
| 1495 _cdq(T_edx, T); | 1431 _cbwdq(T, T); |
| 1496 _idiv(T, Src1, T_edx); | 1432 _idiv(T, Src1, T); |
| 1497 _mov(Dest, T); | 1433 _mov(Dest, T); |
| 1434 } else { | |
| 1435 T_edx = makeReg(IceType_i32, Reg_edx); | |
| 1436 _mov(T, Src0, Reg_eax); | |
| 1437 _cbwdq(T_edx, T); | |
| 1438 _idiv(T, Src1, T_edx); | |
| 1439 _mov(Dest, T); | |
| 1440 } | |
| 1498 break; | 1441 break; |
| 1499 case InstArithmetic::Urem: | 1442 case InstArithmetic::Urem: |
| 1500 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1443 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1501 if (Dest->getType() == IceType_i8) { | 1444 if (Dest->getType() == IceType_i8) { |
| 1502 Variable *T_ah = NULL; | 1445 Variable *T_ah = NULL; |
| 1503 Constant *Zero = Ctx->getConstantZero(IceType_i8); | 1446 Constant *Zero = Ctx->getConstantZero(IceType_i8); |
| 1504 _mov(T, Src0, Reg_eax); | 1447 _mov(T, Src0, Reg_eax); |
| 1505 _mov(T_ah, Zero, Reg_ah); | 1448 _mov(T_ah, Zero, Reg_ah); |
| 1506 _div(T_ah, Src1, T); | 1449 _div(T_ah, Src1, T); |
| 1507 _mov(Dest, T_ah); | 1450 _mov(Dest, T_ah); |
| 1508 } else { | 1451 } else { |
| 1509 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1452 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1510 _mov(T_edx, Zero, Reg_edx); | 1453 _mov(T_edx, Zero, Reg_edx); |
| 1511 _mov(T, Src0, Reg_eax); | 1454 _mov(T, Src0, Reg_eax); |
| 1512 _div(T_edx, Src1, T); | 1455 _div(T_edx, Src1, T); |
| 1513 _mov(Dest, T_edx); | 1456 _mov(Dest, T_edx); |
| 1514 } | 1457 } |
| 1515 break; | 1458 break; |
| 1516 case InstArithmetic::Srem: | 1459 case InstArithmetic::Srem: |
| 1517 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1460 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1518 T_edx = makeReg(IceType_i32, Reg_edx); | 1461 if (Dest->getType() == IceType_i8) { |
| 1519 _mov(T, Src0, Reg_eax); | 1462 Variable *T_ah = makeReg(IceType_i8, Reg_ah); |
| 1520 _cdq(T_edx, T); | 1463 _mov(T, Src0, Reg_eax); |
| 1521 _idiv(T_edx, Src1, T); | 1464 _cbwdq(T, T); |
| 1522 _mov(Dest, T_edx); | 1465 Context.insert(InstFakeDef::create(Func, T_ah)); |
| 1466 _idiv(T_ah, Src1, T); | |
| 1467 _mov(Dest, T_ah); | |
| 1468 } else { | |
| 1469 T_edx = makeReg(IceType_i32, Reg_edx); | |
| 1470 _mov(T, Src0, Reg_eax); | |
| 1471 _cbwdq(T_edx, T); | |
| 1472 _idiv(T_edx, Src1, T); | |
| 1473 _mov(Dest, T_edx); | |
| 1474 } | |
| 1523 break; | 1475 break; |
| 1524 case InstArithmetic::Fadd: | 1476 case InstArithmetic::Fadd: |
| 1525 _mov(T, Src0); | 1477 _mov(T, Src0); |
| 1526 _addss(T, Src1); | 1478 _addss(T, Src1); |
| 1527 _mov(Dest, T); | 1479 _mov(Dest, T); |
| 1528 break; | 1480 break; |
| 1529 case InstArithmetic::Fsub: | 1481 case InstArithmetic::Fsub: |
| 1530 _mov(T, Src0); | 1482 _mov(T, Src0); |
| 1531 _subss(T, Src1); | 1483 _subss(T, Src1); |
| 1532 _mov(Dest, T); | 1484 _mov(Dest, T); |
| (...skipping 2204 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3737 Src0 = legalize(Src0, Legal_All, true); | 3689 Src0 = legalize(Src0, Legal_All, true); |
| 3738 for (SizeT I = 0; I < NumCases; ++I) { | 3690 for (SizeT I = 0; I < NumCases; ++I) { |
| 3739 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I)); | 3691 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I)); |
| 3740 _cmp(Src0, Value); | 3692 _cmp(Src0, Value); |
| 3741 _br(InstX8632Br::Br_e, Inst->getLabel(I)); | 3693 _br(InstX8632Br::Br_e, Inst->getLabel(I)); |
| 3742 } | 3694 } |
| 3743 | 3695 |
| 3744 _br(Inst->getLabelDefault()); | 3696 _br(Inst->getLabelDefault()); |
| 3745 } | 3697 } |
| 3746 | 3698 |
| 3699 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest, | |
|
Jim Stichnoth
2014/08/07 19:58:10
Name the arg "Kind", or something else more descri
wala
2014/08/07 20:30:35
Done.
| |
| 3700 Operand *Src0, Operand *Src1) { | |
| 3701 assert(isVectorType(Dest->getType())); | |
| 3702 Type Ty = Dest->getType(); | |
| 3703 Type ElementTy = typeElementType(Ty); | |
| 3704 SizeT NumElements = typeNumElements(Ty); | |
| 3705 | |
| 3706 Operand *T = Ctx->getConstantUndef(Ty); | |
| 3707 for (SizeT I = 0; I < NumElements; ++I) { | |
| 3708 Constant *Index = Ctx->getConstantInt(IceType_i32, I); | |
| 3709 | |
| 3710 // Extract the next two inputs. | |
| 3711 Variable *Op0 = Func->makeVariable(ElementTy, Context.getNode()); | |
| 3712 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index)); | |
| 3713 Variable *Op1 = Func->makeVariable(ElementTy, Context.getNode()); | |
| 3714 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index)); | |
| 3715 | |
| 3716 // Perform the arithmetic as a scalar operation. | |
| 3717 Variable *Res = Func->makeVariable(ElementTy, Context.getNode()); | |
| 3718 lowerArithmetic(InstArithmetic::create(Func, K, Res, Op0, Op1)); | |
| 3719 | |
| 3720 // Insert the result into position. | |
| 3721 Variable *DestT = Func->makeVariable(Ty, Context.getNode()); | |
| 3722 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index)); | |
| 3723 T = DestT; | |
| 3724 // TODO: Use postLower() in -Om1 mode to avoid buildup of infinite | |
| 3725 // weight temporaries. | |
| 3726 } | |
| 3727 | |
| 3728 lowerAssign(InstAssign::create(Func, Dest, T)); | |
| 3729 } | |
| 3730 | |
| 3747 // The following pattern occurs often in lowered C and C++ code: | 3731 // The following pattern occurs often in lowered C and C++ code: |
| 3748 // | 3732 // |
| 3749 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 | 3733 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 |
| 3750 // %cmp.ext = sext <n x i1> %cmp to <n x ty> | 3734 // %cmp.ext = sext <n x i1> %cmp to <n x ty> |
| 3751 // | 3735 // |
| 3752 // We can eliminate the sext operation by copying the result of pcmpeqd, | 3736 // We can eliminate the sext operation by copying the result of pcmpeqd, |
| 3753 // pcmpgtd, or cmpps (which produce sign extended results) to the result | 3737 // pcmpgtd, or cmpps (which produce sign extended results) to the result |
| 3754 // of the sext operation. | 3738 // of the sext operation. |
| 3755 void | 3739 void |
| 3756 TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) { | 3740 TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) { |
| (...skipping 406 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4163 for (SizeT i = 0; i < Size; ++i) { | 4147 for (SizeT i = 0; i < Size; ++i) { |
| 4164 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 4148 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
| 4165 } | 4149 } |
| 4166 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 4150 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
| 4167 } | 4151 } |
| 4168 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 4152 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
| 4169 << "\n"; | 4153 << "\n"; |
| 4170 } | 4154 } |
| 4171 | 4155 |
| 4172 } // end of namespace Ice | 4156 } // end of namespace Ice |
| OLD | NEW |