OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 1278 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1289 Variable *Src1R = LEGAL_HACK(Src1); | 1289 Variable *Src1R = LEGAL_HACK(Src1); |
1290 _pshufd(T2, Src0R, Mask1030); | 1290 _pshufd(T2, Src0R, Mask1030); |
1291 _pshufd(T3, Src1R, Mask1030); | 1291 _pshufd(T3, Src1R, Mask1030); |
1292 _pmuludq(T1, Src1R); | 1292 _pmuludq(T1, Src1R); |
1293 _pmuludq(T2, T3); | 1293 _pmuludq(T2, T3); |
1294 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); | 1294 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); |
1295 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); | 1295 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); |
1296 _movp(Dest, T4); | 1296 _movp(Dest, T4); |
1297 } else { | 1297 } else { |
1298 assert(Dest->getType() == IceType_v16i8); | 1298 assert(Dest->getType() == IceType_v16i8); |
1299 // Sz_mul_v16i8 | 1299 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); |
1300 const IceString Helper = "Sz_mul_v16i8"; | |
1301 const SizeT MaxSrcs = 2; | |
1302 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
1303 Call->addArg(Src0); | |
1304 Call->addArg(Src1); | |
1305 lowerCall(Call); | |
1306 } | 1300 } |
1307 } break; | 1301 } break; |
1308 case InstArithmetic::Shl: { | 1302 case InstArithmetic::Shl: |
1309 // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8 | 1303 case InstArithmetic::Lshr: |
1310 const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType()); | 1304 case InstArithmetic::Ashr: |
1311 const SizeT MaxSrcs = 2; | 1305 case InstArithmetic::Udiv: |
1312 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | 1306 case InstArithmetic::Urem: |
1313 Call->addArg(Src0); | 1307 case InstArithmetic::Sdiv: |
1314 Call->addArg(Src1); | 1308 case InstArithmetic::Srem: |
1315 lowerCall(Call); | 1309 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); |
1316 } break; | 1310 break; |
1317 case InstArithmetic::Lshr: { | |
1318 // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8 | |
1319 const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType()); | |
1320 const SizeT MaxSrcs = 2; | |
1321 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
1322 Call->addArg(Src0); | |
1323 Call->addArg(Src1); | |
1324 lowerCall(Call); | |
1325 } break; | |
1326 case InstArithmetic::Ashr: { | |
1327 // Sz_ashr_v4i32, Sz_ashr_v8i16, Sz_ashr_v16i8 | |
1328 const IceString Helper = "Sz_ashr_" + typeIdentString(Dest->getType()); | |
1329 const SizeT MaxSrcs = 2; | |
1330 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
1331 Call->addArg(Src0); | |
1332 Call->addArg(Src1); | |
1333 lowerCall(Call); | |
1334 } break; | |
1335 case InstArithmetic::Udiv: { | |
1336 // Sz_udiv_v4i32, Sz_udiv_v8i16, Sz_udiv_v16i8 | |
1337 const IceString Helper = "Sz_udiv_" + typeIdentString(Dest->getType()); | |
1338 const SizeT MaxSrcs = 2; | |
1339 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
1340 Call->addArg(Src0); | |
1341 Call->addArg(Src1); | |
1342 lowerCall(Call); | |
1343 } break; | |
1344 case InstArithmetic::Sdiv: { | |
1345 // Sz_sdiv_v4i32, Sz_sdiv_v8i16, Sz_sdiv_v16i8 | |
1346 const IceString Helper = "Sz_sdiv_" + typeIdentString(Dest->getType()); | |
1347 const SizeT MaxSrcs = 2; | |
1348 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
1349 Call->addArg(Src0); | |
1350 Call->addArg(Src1); | |
1351 lowerCall(Call); | |
1352 } break; | |
1353 case InstArithmetic::Urem: { | |
1354 // Sz_urem_v4i32, Sz_urem_v8i16, Sz_urem_v16i8 | |
1355 const IceString Helper = "Sz_urem_" + typeIdentString(Dest->getType()); | |
1356 const SizeT MaxSrcs = 2; | |
1357 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
1358 Call->addArg(Src0); | |
1359 Call->addArg(Src1); | |
1360 lowerCall(Call); | |
1361 } break; | |
1362 case InstArithmetic::Srem: { | |
1363 // Sz_srem_v4i32, Sz_srem_v8i16, Sz_srem_v16i8 | |
1364 const IceString Helper = "Sz_srem_" + typeIdentString(Dest->getType()); | |
1365 const SizeT MaxSrcs = 2; | |
1366 InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs); | |
1367 Call->addArg(Src0); | |
1368 Call->addArg(Src1); | |
1369 lowerCall(Call); | |
1370 } break; | |
1371 case InstArithmetic::Fadd: { | 1311 case InstArithmetic::Fadd: { |
1372 Variable *T = makeReg(Dest->getType()); | 1312 Variable *T = makeReg(Dest->getType()); |
1373 _movp(T, Src0); | 1313 _movp(T, Src0); |
1374 _addps(T, LEGAL_HACK(Src1)); | 1314 _addps(T, LEGAL_HACK(Src1)); |
1375 _movp(Dest, T); | 1315 _movp(Dest, T); |
1376 } break; | 1316 } break; |
1377 case InstArithmetic::Fsub: { | 1317 case InstArithmetic::Fsub: { |
1378 Variable *T = makeReg(Dest->getType()); | 1318 Variable *T = makeReg(Dest->getType()); |
1379 _movp(T, Src0); | 1319 _movp(T, Src0); |
1380 _subps(T, LEGAL_HACK(Src1)); | 1320 _subps(T, LEGAL_HACK(Src1)); |
1381 _movp(Dest, T); | 1321 _movp(Dest, T); |
1382 } break; | 1322 } break; |
1383 case InstArithmetic::Fmul: { | 1323 case InstArithmetic::Fmul: { |
1384 Variable *T = makeReg(Dest->getType()); | 1324 Variable *T = makeReg(Dest->getType()); |
1385 _movp(T, Src0); | 1325 _movp(T, Src0); |
1386 _mulps(T, LEGAL_HACK(Src1)); | 1326 _mulps(T, LEGAL_HACK(Src1)); |
1387 _movp(Dest, T); | 1327 _movp(Dest, T); |
1388 } break; | 1328 } break; |
1389 case InstArithmetic::Fdiv: { | 1329 case InstArithmetic::Fdiv: { |
1390 Variable *T = makeReg(Dest->getType()); | 1330 Variable *T = makeReg(Dest->getType()); |
1391 _movp(T, Src0); | 1331 _movp(T, Src0); |
1392 _divps(T, LEGAL_HACK(Src1)); | 1332 _divps(T, LEGAL_HACK(Src1)); |
1393 _movp(Dest, T); | 1333 _movp(Dest, T); |
1394 } break; | 1334 } break; |
1395 case InstArithmetic::Frem: { | 1335 case InstArithmetic::Frem: |
1396 const SizeT MaxSrcs = 2; | 1336 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); |
1397 InstCall *Call = makeHelperCall("Sz_frem_v4f32", Dest, MaxSrcs); | 1337 break; |
1398 Call->addArg(Src0); | |
1399 Call->addArg(Src1); | |
1400 lowerCall(Call); | |
1401 } break; | |
1402 } | 1338 } |
1403 #undef LEGAL_HACK | 1339 #undef LEGAL_HACK |
1404 } else { // Dest->getType() is non-i64 scalar | 1340 } else { // Dest->getType() is non-i64 scalar |
1405 Variable *T_edx = NULL; | 1341 Variable *T_edx = NULL; |
1406 Variable *T = NULL; | 1342 Variable *T = NULL; |
1407 switch (Inst->getOp()) { | 1343 switch (Inst->getOp()) { |
1408 case InstArithmetic::_num: | 1344 case InstArithmetic::_num: |
1409 llvm_unreachable("Unknown arithmetic operator"); | 1345 llvm_unreachable("Unknown arithmetic operator"); |
1410 break; | 1346 break; |
1411 case InstArithmetic::Add: | 1347 case InstArithmetic::Add: |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1483 } else { | 1419 } else { |
1484 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1420 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1485 _mov(T, Src0, Reg_eax); | 1421 _mov(T, Src0, Reg_eax); |
1486 _mov(T_edx, Zero, Reg_edx); | 1422 _mov(T_edx, Zero, Reg_edx); |
1487 _div(T, Src1, T_edx); | 1423 _div(T, Src1, T_edx); |
1488 _mov(Dest, T); | 1424 _mov(Dest, T); |
1489 } | 1425 } |
1490 break; | 1426 break; |
1491 case InstArithmetic::Sdiv: | 1427 case InstArithmetic::Sdiv: |
1492 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1428 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1493 T_edx = makeReg(IceType_i32, Reg_edx); | 1429 if (Dest->getType() == IceType_i8) { |
1494 _mov(T, Src0, Reg_eax); | 1430 _mov(T, Src0, Reg_eax); |
1495 _cdq(T_edx, T); | 1431 _cbwdq(T, T); |
1496 _idiv(T, Src1, T_edx); | 1432 _idiv(T, Src1, T); |
1497 _mov(Dest, T); | 1433 _mov(Dest, T); |
| 1434 } else { |
| 1435 T_edx = makeReg(IceType_i32, Reg_edx); |
| 1436 _mov(T, Src0, Reg_eax); |
| 1437 _cbwdq(T_edx, T); |
| 1438 _idiv(T, Src1, T_edx); |
| 1439 _mov(Dest, T); |
| 1440 } |
1498 break; | 1441 break; |
1499 case InstArithmetic::Urem: | 1442 case InstArithmetic::Urem: |
1500 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1443 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1501 if (Dest->getType() == IceType_i8) { | 1444 if (Dest->getType() == IceType_i8) { |
1502 Variable *T_ah = NULL; | 1445 Variable *T_ah = NULL; |
1503 Constant *Zero = Ctx->getConstantZero(IceType_i8); | 1446 Constant *Zero = Ctx->getConstantZero(IceType_i8); |
1504 _mov(T, Src0, Reg_eax); | 1447 _mov(T, Src0, Reg_eax); |
1505 _mov(T_ah, Zero, Reg_ah); | 1448 _mov(T_ah, Zero, Reg_ah); |
1506 _div(T_ah, Src1, T); | 1449 _div(T_ah, Src1, T); |
1507 _mov(Dest, T_ah); | 1450 _mov(Dest, T_ah); |
1508 } else { | 1451 } else { |
1509 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1452 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1510 _mov(T_edx, Zero, Reg_edx); | 1453 _mov(T_edx, Zero, Reg_edx); |
1511 _mov(T, Src0, Reg_eax); | 1454 _mov(T, Src0, Reg_eax); |
1512 _div(T_edx, Src1, T); | 1455 _div(T_edx, Src1, T); |
1513 _mov(Dest, T_edx); | 1456 _mov(Dest, T_edx); |
1514 } | 1457 } |
1515 break; | 1458 break; |
1516 case InstArithmetic::Srem: | 1459 case InstArithmetic::Srem: |
1517 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1460 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1518 T_edx = makeReg(IceType_i32, Reg_edx); | 1461 if (Dest->getType() == IceType_i8) { |
1519 _mov(T, Src0, Reg_eax); | 1462 Variable *T_ah = makeReg(IceType_i8, Reg_ah); |
1520 _cdq(T_edx, T); | 1463 _mov(T, Src0, Reg_eax); |
1521 _idiv(T_edx, Src1, T); | 1464 _cbwdq(T, T); |
1522 _mov(Dest, T_edx); | 1465 Context.insert(InstFakeDef::create(Func, T_ah)); |
| 1466 _idiv(T_ah, Src1, T); |
| 1467 _mov(Dest, T_ah); |
| 1468 } else { |
| 1469 T_edx = makeReg(IceType_i32, Reg_edx); |
| 1470 _mov(T, Src0, Reg_eax); |
| 1471 _cbwdq(T_edx, T); |
| 1472 _idiv(T_edx, Src1, T); |
| 1473 _mov(Dest, T_edx); |
| 1474 } |
1523 break; | 1475 break; |
1524 case InstArithmetic::Fadd: | 1476 case InstArithmetic::Fadd: |
1525 _mov(T, Src0); | 1477 _mov(T, Src0); |
1526 _addss(T, Src1); | 1478 _addss(T, Src1); |
1527 _mov(Dest, T); | 1479 _mov(Dest, T); |
1528 break; | 1480 break; |
1529 case InstArithmetic::Fsub: | 1481 case InstArithmetic::Fsub: |
1530 _mov(T, Src0); | 1482 _mov(T, Src0); |
1531 _subss(T, Src1); | 1483 _subss(T, Src1); |
1532 _mov(Dest, T); | 1484 _mov(Dest, T); |
(...skipping 2204 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3737 Src0 = legalize(Src0, Legal_All, true); | 3689 Src0 = legalize(Src0, Legal_All, true); |
3738 for (SizeT I = 0; I < NumCases; ++I) { | 3690 for (SizeT I = 0; I < NumCases; ++I) { |
3739 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I)); | 3691 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I)); |
3740 _cmp(Src0, Value); | 3692 _cmp(Src0, Value); |
3741 _br(InstX8632Br::Br_e, Inst->getLabel(I)); | 3693 _br(InstX8632Br::Br_e, Inst->getLabel(I)); |
3742 } | 3694 } |
3743 | 3695 |
3744 _br(Inst->getLabelDefault()); | 3696 _br(Inst->getLabelDefault()); |
3745 } | 3697 } |
3746 | 3698 |
| 3699 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind, |
| 3700 Variable *Dest, Operand *Src0, |
| 3701 Operand *Src1) { |
| 3702 assert(isVectorType(Dest->getType())); |
| 3703 Type Ty = Dest->getType(); |
| 3704 Type ElementTy = typeElementType(Ty); |
| 3705 SizeT NumElements = typeNumElements(Ty); |
| 3706 |
| 3707 Operand *T = Ctx->getConstantUndef(Ty); |
| 3708 for (SizeT I = 0; I < NumElements; ++I) { |
| 3709 Constant *Index = Ctx->getConstantInt(IceType_i32, I); |
| 3710 |
| 3711 // Extract the next two inputs. |
| 3712 Variable *Op0 = Func->makeVariable(ElementTy, Context.getNode()); |
| 3713 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index)); |
| 3714 Variable *Op1 = Func->makeVariable(ElementTy, Context.getNode()); |
| 3715 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index)); |
| 3716 |
| 3717 // Perform the arithmetic as a scalar operation. |
| 3718 Variable *Res = Func->makeVariable(ElementTy, Context.getNode()); |
| 3719 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1)); |
| 3720 |
| 3721 // Insert the result into position. |
| 3722 Variable *DestT = Func->makeVariable(Ty, Context.getNode()); |
| 3723 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index)); |
| 3724 T = DestT; |
| 3725 // TODO(stichnot): Use postLower() in -Om1 mode to avoid buildup of |
| 3726 // infinite weight temporaries. |
| 3727 } |
| 3728 |
| 3729 lowerAssign(InstAssign::create(Func, Dest, T)); |
| 3730 } |
| 3731 |
3747 // The following pattern occurs often in lowered C and C++ code: | 3732 // The following pattern occurs often in lowered C and C++ code: |
3748 // | 3733 // |
3749 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 | 3734 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 |
3750 // %cmp.ext = sext <n x i1> %cmp to <n x ty> | 3735 // %cmp.ext = sext <n x i1> %cmp to <n x ty> |
3751 // | 3736 // |
3752 // We can eliminate the sext operation by copying the result of pcmpeqd, | 3737 // We can eliminate the sext operation by copying the result of pcmpeqd, |
3753 // pcmpgtd, or cmpps (which produce sign extended results) to the result | 3738 // pcmpgtd, or cmpps (which produce sign extended results) to the result |
3754 // of the sext operation. | 3739 // of the sext operation. |
3755 void | 3740 void |
3756 TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) { | 3741 TargetX8632::eliminateNextVectorSextInstruction(Variable *SignExtendedResult) { |
(...skipping 406 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4163 for (SizeT i = 0; i < Size; ++i) { | 4148 for (SizeT i = 0; i < Size; ++i) { |
4164 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 4149 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
4165 } | 4150 } |
4166 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 4151 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
4167 } | 4152 } |
4168 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 4153 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
4169 << "\n"; | 4154 << "\n"; |
4170 } | 4155 } |
4171 | 4156 |
4172 } // end of namespace Ice | 4157 } // end of namespace Ice |
OLD | NEW |