Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(208)

Side by Side Diff: src/compiler/arm/code-generator-arm.cc

Issue 2804883008: [WASM SIMD] Implement horizontal add for float and integer types. (Closed)
Patch Set: Rebase, reformat. Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm/simulator-arm.cc ('k') | src/compiler/arm/instruction-codes-arm.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 the V8 project authors. All rights reserved. 1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/compiler/code-generator.h" 5 #include "src/compiler/code-generator.h"
6 6
7 #include "src/arm/macro-assembler-arm.h" 7 #include "src/arm/macro-assembler-arm.h"
8 #include "src/assembler-inl.h" 8 #include "src/assembler-inl.h"
9 #include "src/compilation-info.h" 9 #include "src/compilation-info.h"
10 #include "src/compiler/code-generator-impl.h" 10 #include "src/compiler/code-generator-impl.h"
(...skipping 478 matching lines...) Expand 10 before | Expand all | Expand 10 after
489 FrameScope scope(masm(), StackFrame::MANUAL); \ 489 FrameScope scope(masm(), StackFrame::MANUAL); \
490 __ PrepareCallCFunction(0, 1, kScratchReg); \ 490 __ PrepareCallCFunction(0, 1, kScratchReg); \
491 __ MovToFloatParameter(i.InputDoubleRegister(0)); \ 491 __ MovToFloatParameter(i.InputDoubleRegister(0)); \
492 __ CallCFunction(ExternalReference::ieee754_##name##_function(isolate()), \ 492 __ CallCFunction(ExternalReference::ieee754_##name##_function(isolate()), \
493 0, 1); \ 493 0, 1); \
494 /* Move the result in the double result register. */ \ 494 /* Move the result in the double result register. */ \
495 __ MovFromFloatResult(i.OutputDoubleRegister()); \ 495 __ MovFromFloatResult(i.OutputDoubleRegister()); \
496 DCHECK_EQ(LeaveCC, i.OutputSBit()); \ 496 DCHECK_EQ(LeaveCC, i.OutputSBit()); \
497 } while (0) 497 } while (0)
498 498
499 #define ASSEMBLE_NEON_NARROWING_OP(dt) \
500 do { \
501 Simd128Register dst = i.OutputSimd128Register(), \
502 src0 = i.InputSimd128Register(0), \
503 src1 = i.InputSimd128Register(1); \
504 if (dst.is(src0) && dst.is(src1)) { \
505 __ vqmovn(dt, dst.low(), src0); \
506 __ vmov(dst.high(), dst.low()); \
507 } else if (dst.is(src0)) { \
508 __ vqmovn(dt, dst.low(), src0); \
509 __ vqmovn(dt, dst.high(), src1); \
510 } else { \
511 __ vqmovn(dt, dst.high(), src1); \
512 __ vqmovn(dt, dst.low(), src0); \
513 } \
514 } while (0)
515
516 #define ASSEMBLE_NEON_PAIRWISE_OP(op, size) \
517 do { \
518 Simd128Register dst = i.OutputSimd128Register(), \
519 src0 = i.InputSimd128Register(0), \
520 src1 = i.InputSimd128Register(1); \
521 if (dst.is(src0)) { \
522 __ op(size, dst.low(), src0.low(), src0.high()); \
523 if (dst.is(src1)) { \
524 __ vmov(dst.high(), dst.low()); \
525 } else { \
526 __ op(size, dst.high(), src1.low(), src1.high()); \
527 } \
528 } else { \
529 __ op(size, dst.high(), src1.low(), src1.high()); \
530 __ op(size, dst.low(), src0.low(), src0.high()); \
531 } \
532 } while (0)
533
499 void CodeGenerator::AssembleDeconstructFrame() { 534 void CodeGenerator::AssembleDeconstructFrame() {
500 __ LeaveFrame(StackFrame::MANUAL); 535 __ LeaveFrame(StackFrame::MANUAL);
501 unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset()); 536 unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
502 } 537 }
503 538
504 void CodeGenerator::AssemblePrepareTailCall() { 539 void CodeGenerator::AssemblePrepareTailCall() {
505 if (frame_access_state()->has_frame()) { 540 if (frame_access_state()->has_frame()) {
506 __ ldr(lr, MemOperand(fp, StandardFrameConstants::kCallerPCOffset)); 541 __ ldr(lr, MemOperand(fp, StandardFrameConstants::kCallerPCOffset));
507 __ ldr(fp, MemOperand(fp, StandardFrameConstants::kCallerFPOffset)); 542 __ ldr(fp, MemOperand(fp, StandardFrameConstants::kCallerFPOffset));
508 } 543 }
(...skipping 1095 matching lines...) Expand 10 before | Expand all | Expand 10 after
1604 } 1639 }
1605 case kArmF32x4RecipSqrtApprox: { 1640 case kArmF32x4RecipSqrtApprox: {
1606 __ vrsqrte(i.OutputSimd128Register(), i.InputSimd128Register(0)); 1641 __ vrsqrte(i.OutputSimd128Register(), i.InputSimd128Register(0));
1607 break; 1642 break;
1608 } 1643 }
1609 case kArmF32x4Add: { 1644 case kArmF32x4Add: {
1610 __ vadd(i.OutputSimd128Register(), i.InputSimd128Register(0), 1645 __ vadd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1611 i.InputSimd128Register(1)); 1646 i.InputSimd128Register(1));
1612 break; 1647 break;
1613 } 1648 }
1649 case kArmF32x4AddHoriz: {
1650 Simd128Register dst = i.OutputSimd128Register(),
1651 src0 = i.InputSimd128Register(0),
1652 src1 = i.InputSimd128Register(1);
1653 // Make sure we don't overwrite source data before it's used.
1654 if (dst.is(src0)) {
1655 __ vpadd(dst.low(), src0.low(), src0.high());
1656 if (dst.is(src1)) {
1657 __ vmov(dst.high(), dst.low());
1658 } else {
1659 __ vpadd(dst.high(), src1.low(), src1.high());
1660 }
1661 } else {
1662 __ vpadd(dst.high(), src1.low(), src1.high());
1663 __ vpadd(dst.low(), src0.low(), src0.high());
1664 }
1665 break;
1666 }
1614 case kArmF32x4Sub: { 1667 case kArmF32x4Sub: {
1615 __ vsub(i.OutputSimd128Register(), i.InputSimd128Register(0), 1668 __ vsub(i.OutputSimd128Register(), i.InputSimd128Register(0),
1616 i.InputSimd128Register(1)); 1669 i.InputSimd128Register(1));
1617 break; 1670 break;
1618 } 1671 }
1619 case kArmF32x4Mul: { 1672 case kArmF32x4Mul: {
1620 __ vmul(i.OutputSimd128Register(), i.InputSimd128Register(0), 1673 __ vmul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1621 i.InputSimd128Register(1)); 1674 i.InputSimd128Register(1));
1622 break; 1675 break;
1623 } 1676 }
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
1692 case kArmI32x4ShrS: { 1745 case kArmI32x4ShrS: {
1693 __ vshr(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0), 1746 __ vshr(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1694 i.InputInt5(1)); 1747 i.InputInt5(1));
1695 break; 1748 break;
1696 } 1749 }
1697 case kArmI32x4Add: { 1750 case kArmI32x4Add: {
1698 __ vadd(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0), 1751 __ vadd(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1699 i.InputSimd128Register(1)); 1752 i.InputSimd128Register(1));
1700 break; 1753 break;
1701 } 1754 }
1755 case kArmI32x4AddHoriz:
1756 ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon32);
1757 break;
1702 case kArmI32x4Sub: { 1758 case kArmI32x4Sub: {
1703 __ vsub(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0), 1759 __ vsub(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1704 i.InputSimd128Register(1)); 1760 i.InputSimd128Register(1));
1705 break; 1761 break;
1706 } 1762 }
1707 case kArmI32x4Mul: { 1763 case kArmI32x4Mul: {
1708 __ vmul(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0), 1764 __ vmul(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1709 i.InputSimd128Register(1)); 1765 i.InputSimd128Register(1));
1710 break; 1766 break;
1711 } 1767 }
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
1811 case kArmI16x8Shl: { 1867 case kArmI16x8Shl: {
1812 __ vshl(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0), 1868 __ vshl(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
1813 i.InputInt4(1)); 1869 i.InputInt4(1));
1814 break; 1870 break;
1815 } 1871 }
1816 case kArmI16x8ShrS: { 1872 case kArmI16x8ShrS: {
1817 __ vshr(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0), 1873 __ vshr(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
1818 i.InputInt4(1)); 1874 i.InputInt4(1));
1819 break; 1875 break;
1820 } 1876 }
1821 case kArmI16x8SConvertI32x4: { 1877 case kArmI16x8SConvertI32x4:
1822 Simd128Register dst = i.OutputSimd128Register(), 1878 ASSEMBLE_NEON_NARROWING_OP(NeonS16);
1823 src0 = i.InputSimd128Register(0),
1824 src1 = i.InputSimd128Register(1);
1825 // Take care not to overwrite a source register before it's used.
1826 if (dst.is(src0) && dst.is(src1)) {
1827 __ vqmovn(NeonS16, dst.low(), src0);
1828 __ vmov(dst.high(), dst.low());
1829 } else if (dst.is(src0)) {
1830 // dst is src0, so narrow src0 first.
1831 __ vqmovn(NeonS16, dst.low(), src0);
1832 __ vqmovn(NeonS16, dst.high(), src1);
1833 } else {
1834 // dst may alias src1, so narrow src1 first.
1835 __ vqmovn(NeonS16, dst.high(), src1);
1836 __ vqmovn(NeonS16, dst.low(), src0);
1837 }
1838 break; 1879 break;
1839 }
1840 case kArmI16x8Add: { 1880 case kArmI16x8Add: {
1841 __ vadd(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0), 1881 __ vadd(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
1842 i.InputSimd128Register(1)); 1882 i.InputSimd128Register(1));
1843 break; 1883 break;
1844 } 1884 }
1845 case kArmI16x8AddSaturateS: { 1885 case kArmI16x8AddSaturateS: {
1846 __ vqadd(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0), 1886 __ vqadd(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
1847 i.InputSimd128Register(1)); 1887 i.InputSimd128Register(1));
1848 break; 1888 break;
1849 } 1889 }
1890 case kArmI16x8AddHoriz:
1891 ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon16);
1892 break;
1850 case kArmI16x8Sub: { 1893 case kArmI16x8Sub: {
1851 __ vsub(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0), 1894 __ vsub(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
1852 i.InputSimd128Register(1)); 1895 i.InputSimd128Register(1));
1853 break; 1896 break;
1854 } 1897 }
1855 case kArmI16x8SubSaturateS: { 1898 case kArmI16x8SubSaturateS: {
1856 __ vqsub(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0), 1899 __ vqsub(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
1857 i.InputSimd128Register(1)); 1900 i.InputSimd128Register(1));
1858 break; 1901 break;
1859 } 1902 }
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
1902 case kArmI16x8UConvertI8x16High: { 1945 case kArmI16x8UConvertI8x16High: {
1903 __ vmovl(NeonU8, i.OutputSimd128Register(), 1946 __ vmovl(NeonU8, i.OutputSimd128Register(),
1904 i.InputSimd128Register(0).high()); 1947 i.InputSimd128Register(0).high());
1905 break; 1948 break;
1906 } 1949 }
1907 case kArmI16x8ShrU: { 1950 case kArmI16x8ShrU: {
1908 __ vshr(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0), 1951 __ vshr(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
1909 i.InputInt4(1)); 1952 i.InputInt4(1));
1910 break; 1953 break;
1911 } 1954 }
1912 case kArmI16x8UConvertI32x4: { 1955 case kArmI16x8UConvertI32x4:
1913 Simd128Register dst = i.OutputSimd128Register(), 1956 ASSEMBLE_NEON_NARROWING_OP(NeonU16);
1914 src0 = i.InputSimd128Register(0),
1915 src1 = i.InputSimd128Register(1);
1916 // Take care not to overwrite a source register before it's used.
1917 if (dst.is(src0) && dst.is(src1)) {
1918 __ vqmovn(NeonU16, dst.low(), src0);
1919 __ vmov(dst.high(), dst.low());
1920 } else if (dst.is(src0)) {
1921 // dst is src0, so narrow src0 first.
1922 __ vqmovn(NeonU16, dst.low(), src0);
1923 __ vqmovn(NeonU16, dst.high(), src1);
1924 } else {
1925 // dst may alias src1, so narrow src1 first.
1926 __ vqmovn(NeonU16, dst.high(), src1);
1927 __ vqmovn(NeonU16, dst.low(), src0);
1928 }
1929 break; 1957 break;
1930 }
1931 case kArmI16x8AddSaturateU: { 1958 case kArmI16x8AddSaturateU: {
1932 __ vqadd(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0), 1959 __ vqadd(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
1933 i.InputSimd128Register(1)); 1960 i.InputSimd128Register(1));
1934 break; 1961 break;
1935 } 1962 }
1936 case kArmI16x8SubSaturateU: { 1963 case kArmI16x8SubSaturateU: {
1937 __ vqsub(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0), 1964 __ vqsub(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
1938 i.InputSimd128Register(1)); 1965 i.InputSimd128Register(1));
1939 break; 1966 break;
1940 } 1967 }
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
1979 case kArmI8x16Shl: { 2006 case kArmI8x16Shl: {
1980 __ vshl(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0), 2007 __ vshl(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
1981 i.InputInt3(1)); 2008 i.InputInt3(1));
1982 break; 2009 break;
1983 } 2010 }
1984 case kArmI8x16ShrS: { 2011 case kArmI8x16ShrS: {
1985 __ vshr(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0), 2012 __ vshr(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
1986 i.InputInt3(1)); 2013 i.InputInt3(1));
1987 break; 2014 break;
1988 } 2015 }
1989 case kArmI8x16SConvertI16x8: { 2016 case kArmI8x16SConvertI16x8:
1990 Simd128Register dst = i.OutputSimd128Register(), 2017 ASSEMBLE_NEON_NARROWING_OP(NeonS8);
1991 src0 = i.InputSimd128Register(0),
1992 src1 = i.InputSimd128Register(1);
1993 // Take care not to overwrite a source register before it's used.
1994 if (dst.is(src0) && dst.is(src1)) {
1995 __ vqmovn(NeonS8, dst.low(), src0);
1996 __ vmov(dst.high(), dst.low());
1997 } else if (dst.is(src0)) {
1998 // dst is src0, so narrow src0 first.
1999 __ vqmovn(NeonS8, dst.low(), src0);
2000 __ vqmovn(NeonS8, dst.high(), src1);
2001 } else {
2002 // dst may alias src1, so narrow src1 first.
2003 __ vqmovn(NeonS8, dst.high(), src1);
2004 __ vqmovn(NeonS8, dst.low(), src0);
2005 }
2006 break; 2018 break;
2007 }
2008 case kArmI8x16Add: { 2019 case kArmI8x16Add: {
2009 __ vadd(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0), 2020 __ vadd(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2010 i.InputSimd128Register(1)); 2021 i.InputSimd128Register(1));
2011 break; 2022 break;
2012 } 2023 }
2013 case kArmI8x16AddSaturateS: { 2024 case kArmI8x16AddSaturateS: {
2014 __ vqadd(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0), 2025 __ vqadd(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2015 i.InputSimd128Register(1)); 2026 i.InputSimd128Register(1));
2016 break; 2027 break;
2017 } 2028 }
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
2059 case kArmI8x16LeS: { 2070 case kArmI8x16LeS: {
2060 __ vcge(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(1), 2071 __ vcge(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(1),
2061 i.InputSimd128Register(0)); 2072 i.InputSimd128Register(0));
2062 break; 2073 break;
2063 } 2074 }
2064 case kArmI8x16ShrU: { 2075 case kArmI8x16ShrU: {
2065 __ vshr(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0), 2076 __ vshr(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2066 i.InputInt3(1)); 2077 i.InputInt3(1));
2067 break; 2078 break;
2068 } 2079 }
2069 case kArmI8x16UConvertI16x8: { 2080 case kArmI8x16UConvertI16x8:
2070 Simd128Register dst = i.OutputSimd128Register(), 2081 ASSEMBLE_NEON_NARROWING_OP(NeonU8);
2071 src0 = i.InputSimd128Register(0),
2072 src1 = i.InputSimd128Register(1);
2073 // Take care not to overwrite a source register before it's used.
2074 if (dst.is(src0) && dst.is(src1)) {
2075 __ vqmovn(NeonU8, dst.low(), src0);
2076 __ vmov(dst.high(), dst.low());
2077 } else if (dst.is(src0)) {
2078 // dst is src0, so narrow src0 first.
2079 __ vqmovn(NeonU8, dst.low(), src0);
2080 __ vqmovn(NeonU8, dst.high(), src1);
2081 } else {
2082 // dst may alias src1, so narrow src1 first.
2083 __ vqmovn(NeonU8, dst.high(), src1);
2084 __ vqmovn(NeonU8, dst.low(), src0);
2085 }
2086 break; 2082 break;
2087 }
2088 case kArmI8x16AddSaturateU: { 2083 case kArmI8x16AddSaturateU: {
2089 __ vqadd(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0), 2084 __ vqadd(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2090 i.InputSimd128Register(1)); 2085 i.InputSimd128Register(1));
2091 break; 2086 break;
2092 } 2087 }
2093 case kArmI8x16SubSaturateU: { 2088 case kArmI8x16SubSaturateU: {
2094 __ vqsub(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0), 2089 __ vqsub(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2095 i.InputSimd128Register(1)); 2090 i.InputSimd128Register(1));
2096 break; 2091 break;
2097 } 2092 }
(...skipping 1054 matching lines...) Expand 10 before | Expand all | Expand 10 after
3152 padding_size -= v8::internal::Assembler::kInstrSize; 3147 padding_size -= v8::internal::Assembler::kInstrSize;
3153 } 3148 }
3154 } 3149 }
3155 } 3150 }
3156 3151
3157 #undef __ 3152 #undef __
3158 3153
3159 } // namespace compiler 3154 } // namespace compiler
3160 } // namespace internal 3155 } // namespace internal
3161 } // namespace v8 3156 } // namespace v8
OLDNEW
« no previous file with comments | « src/arm/simulator-arm.cc ('k') | src/compiler/arm/instruction-codes-arm.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698