| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 1474 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1485 _movp(Dest, T); | 1485 _movp(Dest, T); |
| 1486 } break; | 1486 } break; |
| 1487 case InstArithmetic::Mul: { | 1487 case InstArithmetic::Mul: { |
| 1488 bool TypesAreValidForPmull = | 1488 bool TypesAreValidForPmull = |
| 1489 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; | 1489 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; |
| 1490 bool InstructionSetIsValidForPmull = | 1490 bool InstructionSetIsValidForPmull = |
| 1491 Dest->getType() == IceType_v8i16 || InstructionSet >= Traits::SSE4_1; | 1491 Dest->getType() == IceType_v8i16 || InstructionSet >= Traits::SSE4_1; |
| 1492 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { | 1492 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { |
| 1493 Variable *T = makeReg(Dest->getType()); | 1493 Variable *T = makeReg(Dest->getType()); |
| 1494 _movp(T, Src0); | 1494 _movp(T, Src0); |
| 1495 _pmull(T, Src1); | 1495 _pmull(T, Src0 == Src1 ? T : Src1); |
| 1496 _movp(Dest, T); | 1496 _movp(Dest, T); |
| 1497 } else if (Dest->getType() == IceType_v4i32) { | 1497 } else if (Dest->getType() == IceType_v4i32) { |
| 1498 // Lowering sequence: | 1498 // Lowering sequence: |
| 1499 // Note: The mask arguments have index 0 on the left. | 1499 // Note: The mask arguments have index 0 on the left. |
| 1500 // | 1500 // |
| 1501 // movups T1, Src0 | 1501 // movups T1, Src0 |
| 1502 // pshufd T2, Src0, {1,0,3,0} | 1502 // pshufd T2, Src0, {1,0,3,0} |
| 1503 // pshufd T3, Src1, {1,0,3,0} | 1503 // pshufd T3, Src1, {1,0,3,0} |
| 1504 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]} | 1504 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]} |
| 1505 // pmuludq T1, Src1 | 1505 // pmuludq T1, Src1 |
| (...skipping 19 matching lines...) Expand all Loading... |
| 1525 Variable *T3 = makeReg(IceType_v4i32); | 1525 Variable *T3 = makeReg(IceType_v4i32); |
| 1526 Variable *T4 = makeReg(IceType_v4i32); | 1526 Variable *T4 = makeReg(IceType_v4i32); |
| 1527 _movp(T1, Src0); | 1527 _movp(T1, Src0); |
| 1528 _pshufd(T2, Src0, Mask1030); | 1528 _pshufd(T2, Src0, Mask1030); |
| 1529 _pshufd(T3, Src1, Mask1030); | 1529 _pshufd(T3, Src1, Mask1030); |
| 1530 _pmuludq(T1, Src1); | 1530 _pmuludq(T1, Src1); |
| 1531 _pmuludq(T2, T3); | 1531 _pmuludq(T2, T3); |
| 1532 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202)); | 1532 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202)); |
| 1533 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213)); | 1533 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213)); |
| 1534 _movp(Dest, T4); | 1534 _movp(Dest, T4); |
| 1535 } else if (Dest->getType() == IceType_v16i8) { |
| 1536 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); |
| 1535 } else { | 1537 } else { |
| 1536 assert(Dest->getType() == IceType_v16i8); | 1538 llvm::report_fatal_error("Invalid vector multiply type"); |
| 1537 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); | |
| 1538 } | 1539 } |
| 1539 } break; | 1540 } break; |
| 1540 case InstArithmetic::Shl: | 1541 case InstArithmetic::Shl: |
| 1541 case InstArithmetic::Lshr: | 1542 case InstArithmetic::Lshr: |
| 1542 case InstArithmetic::Ashr: | 1543 case InstArithmetic::Ashr: |
| 1543 case InstArithmetic::Udiv: | 1544 case InstArithmetic::Udiv: |
| 1544 case InstArithmetic::Urem: | 1545 case InstArithmetic::Urem: |
| 1545 case InstArithmetic::Sdiv: | 1546 case InstArithmetic::Sdiv: |
| 1546 case InstArithmetic::Srem: | 1547 case InstArithmetic::Srem: |
| 1547 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); | 1548 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); |
| 1548 break; | 1549 break; |
| 1549 case InstArithmetic::Fadd: { | 1550 case InstArithmetic::Fadd: { |
| 1550 Variable *T = makeReg(Dest->getType()); | 1551 Variable *T = makeReg(Dest->getType()); |
| 1551 _movp(T, Src0); | 1552 _movp(T, Src0); |
| 1552 _addps(T, Src1); | 1553 _addps(T, Src1); |
| 1553 _movp(Dest, T); | 1554 _movp(Dest, T); |
| 1554 } break; | 1555 } break; |
| 1555 case InstArithmetic::Fsub: { | 1556 case InstArithmetic::Fsub: { |
| 1556 Variable *T = makeReg(Dest->getType()); | 1557 Variable *T = makeReg(Dest->getType()); |
| 1557 _movp(T, Src0); | 1558 _movp(T, Src0); |
| 1558 _subps(T, Src1); | 1559 _subps(T, Src1); |
| 1559 _movp(Dest, T); | 1560 _movp(Dest, T); |
| 1560 } break; | 1561 } break; |
| 1561 case InstArithmetic::Fmul: { | 1562 case InstArithmetic::Fmul: { |
| 1562 Variable *T = makeReg(Dest->getType()); | 1563 Variable *T = makeReg(Dest->getType()); |
| 1563 _movp(T, Src0); | 1564 _movp(T, Src0); |
| 1564 _mulps(T, Src1); | 1565 _mulps(T, Src0 == Src1 ? T : Src1); |
| 1565 _movp(Dest, T); | 1566 _movp(Dest, T); |
| 1566 } break; | 1567 } break; |
| 1567 case InstArithmetic::Fdiv: { | 1568 case InstArithmetic::Fdiv: { |
| 1568 Variable *T = makeReg(Dest->getType()); | 1569 Variable *T = makeReg(Dest->getType()); |
| 1569 _movp(T, Src0); | 1570 _movp(T, Src0); |
| 1570 _divps(T, Src1); | 1571 _divps(T, Src1); |
| 1571 _movp(Dest, T); | 1572 _movp(Dest, T); |
| 1572 } break; | 1573 } break; |
| 1573 case InstArithmetic::Frem: | 1574 case InstArithmetic::Frem: |
| 1574 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); | 1575 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1613 return; | 1614 return; |
| 1614 } | 1615 } |
| 1615 // The 8-bit version of imul only allows the form "imul r/m8" where T must | 1616 // The 8-bit version of imul only allows the form "imul r/m8" where T must |
| 1616 // be in eax. | 1617 // be in eax. |
| 1617 if (isByteSizedArithType(Dest->getType())) { | 1618 if (isByteSizedArithType(Dest->getType())) { |
| 1618 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1619 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1619 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1620 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1620 } else { | 1621 } else { |
| 1621 _mov(T, Src0); | 1622 _mov(T, Src0); |
| 1622 } | 1623 } |
| 1623 _imul(T, Src1); | 1624 _imul(T, Src0 == Src1 ? T : Src1); |
| 1624 _mov(Dest, T); | 1625 _mov(Dest, T); |
| 1625 break; | 1626 break; |
| 1626 case InstArithmetic::Shl: | 1627 case InstArithmetic::Shl: |
| 1627 _mov(T, Src0); | 1628 _mov(T, Src0); |
| 1628 if (!llvm::isa<ConstantInteger32>(Src1)) | 1629 if (!llvm::isa<ConstantInteger32>(Src1)) |
| 1629 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); | 1630 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); |
| 1630 _shl(T, Src1); | 1631 _shl(T, Src1); |
| 1631 _mov(Dest, T); | 1632 _mov(Dest, T); |
| 1632 break; | 1633 break; |
| 1633 case InstArithmetic::Lshr: | 1634 case InstArithmetic::Lshr: |
| (...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1819 _addss(T, Src1); | 1820 _addss(T, Src1); |
| 1820 _mov(Dest, T); | 1821 _mov(Dest, T); |
| 1821 break; | 1822 break; |
| 1822 case InstArithmetic::Fsub: | 1823 case InstArithmetic::Fsub: |
| 1823 _mov(T, Src0); | 1824 _mov(T, Src0); |
| 1824 _subss(T, Src1); | 1825 _subss(T, Src1); |
| 1825 _mov(Dest, T); | 1826 _mov(Dest, T); |
| 1826 break; | 1827 break; |
| 1827 case InstArithmetic::Fmul: | 1828 case InstArithmetic::Fmul: |
| 1828 _mov(T, Src0); | 1829 _mov(T, Src0); |
| 1829 _mulss(T, Src1); | 1830 _mulss(T, Src0 == Src1 ? T : Src1); |
| 1830 _mov(Dest, T); | 1831 _mov(Dest, T); |
| 1831 break; | 1832 break; |
| 1832 case InstArithmetic::Fdiv: | 1833 case InstArithmetic::Fdiv: |
| 1833 _mov(T, Src0); | 1834 _mov(T, Src0); |
| 1834 _divss(T, Src1); | 1835 _divss(T, Src1); |
| 1835 _mov(Dest, T); | 1836 _mov(Dest, T); |
| 1836 break; | 1837 break; |
| 1837 case InstArithmetic::Frem: { | 1838 case InstArithmetic::Frem: { |
| 1838 const SizeT MaxSrcs = 2; | 1839 const SizeT MaxSrcs = 2; |
| 1839 Type Ty = Dest->getType(); | 1840 Type Ty = Dest->getType(); |
| (...skipping 3655 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5495 } | 5496 } |
| 5496 // the offset is not eligible for blinding or pooling, return the original | 5497 // the offset is not eligible for blinding or pooling, return the original |
| 5497 // mem operand | 5498 // mem operand |
| 5498 return MemOperand; | 5499 return MemOperand; |
| 5499 } | 5500 } |
| 5500 | 5501 |
| 5501 } // end of namespace X86Internal | 5502 } // end of namespace X86Internal |
| 5502 } // end of namespace Ice | 5503 } // end of namespace Ice |
| 5503 | 5504 |
| 5504 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5505 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |