Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(10)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1377213004: Subzero: Improve lowering sequence for "a=b*b". (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Fix comment Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | tests_lit/llvm2ice_tests/square.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 1474 matching lines...) Expand 10 before | Expand all | Expand 10 after
1485 _movp(Dest, T); 1485 _movp(Dest, T);
1486 } break; 1486 } break;
1487 case InstArithmetic::Mul: { 1487 case InstArithmetic::Mul: {
1488 bool TypesAreValidForPmull = 1488 bool TypesAreValidForPmull =
1489 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; 1489 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1490 bool InstructionSetIsValidForPmull = 1490 bool InstructionSetIsValidForPmull =
1491 Dest->getType() == IceType_v8i16 || InstructionSet >= Traits::SSE4_1; 1491 Dest->getType() == IceType_v8i16 || InstructionSet >= Traits::SSE4_1;
1492 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { 1492 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1493 Variable *T = makeReg(Dest->getType()); 1493 Variable *T = makeReg(Dest->getType());
1494 _movp(T, Src0); 1494 _movp(T, Src0);
1495 _pmull(T, Src1); 1495 _pmull(T, Src0 == Src1 ? T : Src1);
1496 _movp(Dest, T); 1496 _movp(Dest, T);
1497 } else if (Dest->getType() == IceType_v4i32) { 1497 } else if (Dest->getType() == IceType_v4i32) {
1498 // Lowering sequence: 1498 // Lowering sequence:
1499 // Note: The mask arguments have index 0 on the left. 1499 // Note: The mask arguments have index 0 on the left.
1500 // 1500 //
1501 // movups T1, Src0 1501 // movups T1, Src0
1502 // pshufd T2, Src0, {1,0,3,0} 1502 // pshufd T2, Src0, {1,0,3,0}
1503 // pshufd T3, Src1, {1,0,3,0} 1503 // pshufd T3, Src1, {1,0,3,0}
1504 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]} 1504 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1505 // pmuludq T1, Src1 1505 // pmuludq T1, Src1
(...skipping 19 matching lines...) Expand all
1525 Variable *T3 = makeReg(IceType_v4i32); 1525 Variable *T3 = makeReg(IceType_v4i32);
1526 Variable *T4 = makeReg(IceType_v4i32); 1526 Variable *T4 = makeReg(IceType_v4i32);
1527 _movp(T1, Src0); 1527 _movp(T1, Src0);
1528 _pshufd(T2, Src0, Mask1030); 1528 _pshufd(T2, Src0, Mask1030);
1529 _pshufd(T3, Src1, Mask1030); 1529 _pshufd(T3, Src1, Mask1030);
1530 _pmuludq(T1, Src1); 1530 _pmuludq(T1, Src1);
1531 _pmuludq(T2, T3); 1531 _pmuludq(T2, T3);
1532 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202)); 1532 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
1533 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213)); 1533 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
1534 _movp(Dest, T4); 1534 _movp(Dest, T4);
1535 } else if (Dest->getType() == IceType_v16i8) {
1536 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1535 } else { 1537 } else {
1536 assert(Dest->getType() == IceType_v16i8); 1538 llvm::report_fatal_error("Invalid vector multiply type");
1537 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1538 } 1539 }
1539 } break; 1540 } break;
1540 case InstArithmetic::Shl: 1541 case InstArithmetic::Shl:
1541 case InstArithmetic::Lshr: 1542 case InstArithmetic::Lshr:
1542 case InstArithmetic::Ashr: 1543 case InstArithmetic::Ashr:
1543 case InstArithmetic::Udiv: 1544 case InstArithmetic::Udiv:
1544 case InstArithmetic::Urem: 1545 case InstArithmetic::Urem:
1545 case InstArithmetic::Sdiv: 1546 case InstArithmetic::Sdiv:
1546 case InstArithmetic::Srem: 1547 case InstArithmetic::Srem:
1547 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1548 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1548 break; 1549 break;
1549 case InstArithmetic::Fadd: { 1550 case InstArithmetic::Fadd: {
1550 Variable *T = makeReg(Dest->getType()); 1551 Variable *T = makeReg(Dest->getType());
1551 _movp(T, Src0); 1552 _movp(T, Src0);
1552 _addps(T, Src1); 1553 _addps(T, Src1);
1553 _movp(Dest, T); 1554 _movp(Dest, T);
1554 } break; 1555 } break;
1555 case InstArithmetic::Fsub: { 1556 case InstArithmetic::Fsub: {
1556 Variable *T = makeReg(Dest->getType()); 1557 Variable *T = makeReg(Dest->getType());
1557 _movp(T, Src0); 1558 _movp(T, Src0);
1558 _subps(T, Src1); 1559 _subps(T, Src1);
1559 _movp(Dest, T); 1560 _movp(Dest, T);
1560 } break; 1561 } break;
1561 case InstArithmetic::Fmul: { 1562 case InstArithmetic::Fmul: {
1562 Variable *T = makeReg(Dest->getType()); 1563 Variable *T = makeReg(Dest->getType());
1563 _movp(T, Src0); 1564 _movp(T, Src0);
1564 _mulps(T, Src1); 1565 _mulps(T, Src0 == Src1 ? T : Src1);
1565 _movp(Dest, T); 1566 _movp(Dest, T);
1566 } break; 1567 } break;
1567 case InstArithmetic::Fdiv: { 1568 case InstArithmetic::Fdiv: {
1568 Variable *T = makeReg(Dest->getType()); 1569 Variable *T = makeReg(Dest->getType());
1569 _movp(T, Src0); 1570 _movp(T, Src0);
1570 _divps(T, Src1); 1571 _divps(T, Src1);
1571 _movp(Dest, T); 1572 _movp(Dest, T);
1572 } break; 1573 } break;
1573 case InstArithmetic::Frem: 1574 case InstArithmetic::Frem:
1574 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1575 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
1613 return; 1614 return;
1614 } 1615 }
1615 // The 8-bit version of imul only allows the form "imul r/m8" where T must 1616 // The 8-bit version of imul only allows the form "imul r/m8" where T must
1616 // be in eax. 1617 // be in eax.
1617 if (isByteSizedArithType(Dest->getType())) { 1618 if (isByteSizedArithType(Dest->getType())) {
1618 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1619 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1619 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1620 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1620 } else { 1621 } else {
1621 _mov(T, Src0); 1622 _mov(T, Src0);
1622 } 1623 }
1623 _imul(T, Src1); 1624 _imul(T, Src0 == Src1 ? T : Src1);
1624 _mov(Dest, T); 1625 _mov(Dest, T);
1625 break; 1626 break;
1626 case InstArithmetic::Shl: 1627 case InstArithmetic::Shl:
1627 _mov(T, Src0); 1628 _mov(T, Src0);
1628 if (!llvm::isa<ConstantInteger32>(Src1)) 1629 if (!llvm::isa<ConstantInteger32>(Src1))
1629 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); 1630 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);
1630 _shl(T, Src1); 1631 _shl(T, Src1);
1631 _mov(Dest, T); 1632 _mov(Dest, T);
1632 break; 1633 break;
1633 case InstArithmetic::Lshr: 1634 case InstArithmetic::Lshr:
(...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after
1819 _addss(T, Src1); 1820 _addss(T, Src1);
1820 _mov(Dest, T); 1821 _mov(Dest, T);
1821 break; 1822 break;
1822 case InstArithmetic::Fsub: 1823 case InstArithmetic::Fsub:
1823 _mov(T, Src0); 1824 _mov(T, Src0);
1824 _subss(T, Src1); 1825 _subss(T, Src1);
1825 _mov(Dest, T); 1826 _mov(Dest, T);
1826 break; 1827 break;
1827 case InstArithmetic::Fmul: 1828 case InstArithmetic::Fmul:
1828 _mov(T, Src0); 1829 _mov(T, Src0);
1829 _mulss(T, Src1); 1830 _mulss(T, Src0 == Src1 ? T : Src1);
1830 _mov(Dest, T); 1831 _mov(Dest, T);
1831 break; 1832 break;
1832 case InstArithmetic::Fdiv: 1833 case InstArithmetic::Fdiv:
1833 _mov(T, Src0); 1834 _mov(T, Src0);
1834 _divss(T, Src1); 1835 _divss(T, Src1);
1835 _mov(Dest, T); 1836 _mov(Dest, T);
1836 break; 1837 break;
1837 case InstArithmetic::Frem: { 1838 case InstArithmetic::Frem: {
1838 const SizeT MaxSrcs = 2; 1839 const SizeT MaxSrcs = 2;
1839 Type Ty = Dest->getType(); 1840 Type Ty = Dest->getType();
(...skipping 3655 matching lines...) Expand 10 before | Expand all | Expand 10 after
5495 } 5496 }
5496 // the offset is not eligible for blinding or pooling, return the original 5497 // the offset is not eligible for blinding or pooling, return the original
5497 // mem operand 5498 // mem operand
5498 return MemOperand; 5499 return MemOperand;
5499 } 5500 }
5500 5501
5501 } // end of namespace X86Internal 5502 } // end of namespace X86Internal
5502 } // end of namespace Ice 5503 } // end of namespace Ice
5503 5504
5504 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5505 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « no previous file | tests_lit/llvm2ice_tests/square.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698