src/IceTargetLoweringX86BaseImpl.h - Issue 1377213004: Subzero: Improve lowering sequence for "a=b*b".

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1377213004: Subzero: Improve lowering sequence for "a=b*b". (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Fix comment Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//	1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 1474 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1485 _movp(Dest, T);	1485 _movp(Dest, T);

1486 } break;	1486 } break;

1487 case InstArithmetic::Mul: {	1487 case InstArithmetic::Mul: {

1488 bool TypesAreValidForPmull =	1488 bool TypesAreValidForPmull =

1489 Dest->getType() == IceType_v4i32 \|\| Dest->getType() == IceType_v8i16;	1489 Dest->getType() == IceType_v4i32 \|\| Dest->getType() == IceType_v8i16;

1490 bool InstructionSetIsValidForPmull =	1490 bool InstructionSetIsValidForPmull =

1491 Dest->getType() == IceType_v8i16 \|\| InstructionSet >= Traits::SSE4_1;	1491 Dest->getType() == IceType_v8i16 \|\| InstructionSet >= Traits::SSE4_1;

1492 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {	1492 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {

1493 Variable *T = makeReg(Dest->getType());	1493 Variable *T = makeReg(Dest->getType());

1494 _movp(T, Src0);	1494 _movp(T, Src0);

1495 _pmull(T, Src1);	1495 _pmull(T, Src0 == Src1 ? T : Src1);

1496 _movp(Dest, T);	1496 _movp(Dest, T);

1497 } else if (Dest->getType() == IceType_v4i32) {	1497 } else if (Dest->getType() == IceType_v4i32) {

1498 // Lowering sequence:	1498 // Lowering sequence:

1499 // Note: The mask arguments have index 0 on the left.	1499 // Note: The mask arguments have index 0 on the left.

1500 //	1500 //

1501 // movups T1, Src0	1501 // movups T1, Src0

1502 // pshufd T2, Src0, {1,0,3,0}	1502 // pshufd T2, Src0, {1,0,3,0}

1503 // pshufd T3, Src1, {1,0,3,0}	1503 // pshufd T3, Src1, {1,0,3,0}

1504 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}	1504 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}

1505 // pmuludq T1, Src1	1505 // pmuludq T1, Src1

(...skipping 19 matching lines...) Expand all Loading...
1525 Variable *T3 = makeReg(IceType_v4i32);	1525 Variable *T3 = makeReg(IceType_v4i32);

1526 Variable *T4 = makeReg(IceType_v4i32);	1526 Variable *T4 = makeReg(IceType_v4i32);

1527 _movp(T1, Src0);	1527 _movp(T1, Src0);

1528 _pshufd(T2, Src0, Mask1030);	1528 _pshufd(T2, Src0, Mask1030);

1529 _pshufd(T3, Src1, Mask1030);	1529 _pshufd(T3, Src1, Mask1030);

1530 _pmuludq(T1, Src1);	1530 _pmuludq(T1, Src1);

1531 _pmuludq(T2, T3);	1531 _pmuludq(T2, T3);

1532 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));	1532 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));

1533 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));	1533 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));

1534 _movp(Dest, T4);	1534 _movp(Dest, T4);

	1535 } else if (Dest->getType() == IceType_v16i8) {

	1536 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1535 } else {	1537 } else {

1536 assert(Dest->getType() == IceType_v16i8);	1538 llvm::report_fatal_error("Invalid vector multiply type");

1537 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1538 }	1539 }

1539 } break;	1540 } break;

1540 case InstArithmetic::Shl:	1541 case InstArithmetic::Shl:

1541 case InstArithmetic::Lshr:	1542 case InstArithmetic::Lshr:

1542 case InstArithmetic::Ashr:	1543 case InstArithmetic::Ashr:

1543 case InstArithmetic::Udiv:	1544 case InstArithmetic::Udiv:

1544 case InstArithmetic::Urem:	1545 case InstArithmetic::Urem:

1545 case InstArithmetic::Sdiv:	1546 case InstArithmetic::Sdiv:

1546 case InstArithmetic::Srem:	1547 case InstArithmetic::Srem:

1547 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1548 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1548 break;	1549 break;

1549 case InstArithmetic::Fadd: {	1550 case InstArithmetic::Fadd: {

1550 Variable *T = makeReg(Dest->getType());	1551 Variable *T = makeReg(Dest->getType());

1551 _movp(T, Src0);	1552 _movp(T, Src0);

1552 _addps(T, Src1);	1553 _addps(T, Src1);

1553 _movp(Dest, T);	1554 _movp(Dest, T);

1554 } break;	1555 } break;

1555 case InstArithmetic::Fsub: {	1556 case InstArithmetic::Fsub: {

1556 Variable *T = makeReg(Dest->getType());	1557 Variable *T = makeReg(Dest->getType());

1557 _movp(T, Src0);	1558 _movp(T, Src0);

1558 _subps(T, Src1);	1559 _subps(T, Src1);

1559 _movp(Dest, T);	1560 _movp(Dest, T);

1560 } break;	1561 } break;

1561 case InstArithmetic::Fmul: {	1562 case InstArithmetic::Fmul: {

1562 Variable *T = makeReg(Dest->getType());	1563 Variable *T = makeReg(Dest->getType());

1563 _movp(T, Src0);	1564 _movp(T, Src0);

1564 _mulps(T, Src1);	1565 _mulps(T, Src0 == Src1 ? T : Src1);

1565 _movp(Dest, T);	1566 _movp(Dest, T);

1566 } break;	1567 } break;

1567 case InstArithmetic::Fdiv: {	1568 case InstArithmetic::Fdiv: {

1568 Variable *T = makeReg(Dest->getType());	1569 Variable *T = makeReg(Dest->getType());

1569 _movp(T, Src0);	1570 _movp(T, Src0);

1570 _divps(T, Src1);	1571 _divps(T, Src1);

1571 _movp(Dest, T);	1572 _movp(Dest, T);

1572 } break;	1573 } break;

1573 case InstArithmetic::Frem:	1574 case InstArithmetic::Frem:

1574 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1575 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1613 return;	1614 return;

1614 }	1615 }

1615 // The 8-bit version of imul only allows the form "imul r/m8" where T must	1616 // The 8-bit version of imul only allows the form "imul r/m8" where T must

1616 // be in eax.	1617 // be in eax.

1617 if (isByteSizedArithType(Dest->getType())) {	1618 if (isByteSizedArithType(Dest->getType())) {

1618 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1619 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1619 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1620 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1620 } else {	1621 } else {

1621 _mov(T, Src0);	1622 _mov(T, Src0);

1622 }	1623 }

1623 _imul(T, Src1);	1624 _imul(T, Src0 == Src1 ? T : Src1);

1624 _mov(Dest, T);	1625 _mov(Dest, T);

1625 break;	1626 break;

1626 case InstArithmetic::Shl:	1627 case InstArithmetic::Shl:

1627 _mov(T, Src0);	1628 _mov(T, Src0);

1628 if (!llvm::isa<ConstantInteger32>(Src1))	1629 if (!llvm::isa<ConstantInteger32>(Src1))

1629 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);	1630 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);

1630 _shl(T, Src1);	1631 _shl(T, Src1);

1631 _mov(Dest, T);	1632 _mov(Dest, T);

1632 break;	1633 break;

1633 case InstArithmetic::Lshr:	1634 case InstArithmetic::Lshr:

(...skipping 185 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1819 _addss(T, Src1);	1820 _addss(T, Src1);

1820 _mov(Dest, T);	1821 _mov(Dest, T);

1821 break;	1822 break;

1822 case InstArithmetic::Fsub:	1823 case InstArithmetic::Fsub:

1823 _mov(T, Src0);	1824 _mov(T, Src0);

1824 _subss(T, Src1);	1825 _subss(T, Src1);

1825 _mov(Dest, T);	1826 _mov(Dest, T);

1826 break;	1827 break;

1827 case InstArithmetic::Fmul:	1828 case InstArithmetic::Fmul:

1828 _mov(T, Src0);	1829 _mov(T, Src0);

1829 _mulss(T, Src1);	1830 _mulss(T, Src0 == Src1 ? T : Src1);

1830 _mov(Dest, T);	1831 _mov(Dest, T);

1831 break;	1832 break;

1832 case InstArithmetic::Fdiv:	1833 case InstArithmetic::Fdiv:

1833 _mov(T, Src0);	1834 _mov(T, Src0);

1834 _divss(T, Src1);	1835 _divss(T, Src1);

1835 _mov(Dest, T);	1836 _mov(Dest, T);

1836 break;	1837 break;

1837 case InstArithmetic::Frem: {	1838 case InstArithmetic::Frem: {

1838 const SizeT MaxSrcs = 2;	1839 const SizeT MaxSrcs = 2;

1839 Type Ty = Dest->getType();	1840 Type Ty = Dest->getType();

(...skipping 3655 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5495 }	5496 }

5496 // the offset is not eligible for blinding or pooling, return the original	5497 // the offset is not eligible for blinding or pooling, return the original

5497 // mem operand	5498 // mem operand

5498 return MemOperand;	5499 return MemOperand;

5499 }	5500 }

5500	5501

5501 } // end of namespace X86Internal	5502 } // end of namespace X86Internal

5502 } // end of namespace Ice	5503 } // end of namespace Ice

5503	5504

5504 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	5505 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

OLD	NEW

« no previous file with comments | « no previous file | tests_lit/llvm2ice_tests/square.ll » ('j') | no next file with comments »