Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/compiler/arm/code-generator-arm.cc

Issue 2804883008: [WASM SIMD] Implement horizontal add for float and integer types. (Closed)
Patch Set: Fix MIPS. Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 the V8 project authors. All rights reserved. 1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/compiler/code-generator.h" 5 #include "src/compiler/code-generator.h"
6 6
7 #include "src/arm/macro-assembler-arm.h" 7 #include "src/arm/macro-assembler-arm.h"
8 #include "src/assembler-inl.h" 8 #include "src/assembler-inl.h"
9 #include "src/compilation-info.h" 9 #include "src/compilation-info.h"
10 #include "src/compiler/code-generator-impl.h" 10 #include "src/compiler/code-generator-impl.h"
(...skipping 1593 matching lines...) Expand 10 before | Expand all | Expand 10 after
1604 } 1604 }
1605 case kArmF32x4RecipSqrtApprox: { 1605 case kArmF32x4RecipSqrtApprox: {
1606 __ vrsqrte(i.OutputSimd128Register(), i.InputSimd128Register(0)); 1606 __ vrsqrte(i.OutputSimd128Register(), i.InputSimd128Register(0));
1607 break; 1607 break;
1608 } 1608 }
1609 case kArmF32x4Add: { 1609 case kArmF32x4Add: {
1610 __ vadd(i.OutputSimd128Register(), i.InputSimd128Register(0), 1610 __ vadd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1611 i.InputSimd128Register(1)); 1611 i.InputSimd128Register(1));
1612 break; 1612 break;
1613 } 1613 }
1614 case kArmF32x4AddHoriz: {
1615 Simd128Register dst = i.OutputSimd128Register(),
1616 src0 = i.InputSimd128Register(0),
1617 src1 = i.InputSimd128Register(1);
1618 // Make sure we don't overwrite source data before it's used.
1619 if (dst.is(src0)) {
1620 __ vpadd(dst.low(), src0.low(), src0.high());
georgia.kouveli 2017/04/20 14:53:06 The inputs to all the vpadd instructions seem to b
bbudge 2017/04/21 20:18:58 I changed the tests to use the permutation test ma
1621 if (dst.is(src1)) {
1622 __ vmov(dst.high(), dst.low());
1623 } else {
1624 __ vpadd(dst.high(), src1.low(), src1.high());
1625 }
1626 } else {
1627 __ vpadd(dst.high(), src1.low(), src1.high());
1628 __ vpadd(dst.low(), src0.low(), src0.high());
1629 }
1630 break;
1631 }
1614 case kArmF32x4Sub: { 1632 case kArmF32x4Sub: {
1615 __ vsub(i.OutputSimd128Register(), i.InputSimd128Register(0), 1633 __ vsub(i.OutputSimd128Register(), i.InputSimd128Register(0),
1616 i.InputSimd128Register(1)); 1634 i.InputSimd128Register(1));
1617 break; 1635 break;
1618 } 1636 }
1619 case kArmF32x4Mul: { 1637 case kArmF32x4Mul: {
1620 __ vmul(i.OutputSimd128Register(), i.InputSimd128Register(0), 1638 __ vmul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1621 i.InputSimd128Register(1)); 1639 i.InputSimd128Register(1));
1622 break; 1640 break;
1623 } 1641 }
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
1702 case kArmI32x4ShrS: { 1720 case kArmI32x4ShrS: {
1703 __ vshr(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0), 1721 __ vshr(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1704 i.InputInt5(1)); 1722 i.InputInt5(1));
1705 break; 1723 break;
1706 } 1724 }
1707 case kArmI32x4Add: { 1725 case kArmI32x4Add: {
1708 __ vadd(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0), 1726 __ vadd(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1709 i.InputSimd128Register(1)); 1727 i.InputSimd128Register(1));
1710 break; 1728 break;
1711 } 1729 }
1730 case kArmI32x4AddHoriz: {
1731 Simd128Register dst = i.OutputSimd128Register(),
1732 src0 = i.InputSimd128Register(0),
1733 src1 = i.InputSimd128Register(1);
1734 // Make sure we don't overwrite source data before it's used.
georgia.kouveli 2017/04/20 14:53:06 Might make sense to factor out this code, since it
bbudge 2017/04/21 20:18:58 Done. (And I did the same for the repetitious narr
1735 if (dst.is(src0)) {
1736 __ vpadd(Neon32, dst.low(), src0.low(), src0.high());
1737 if (dst.is(src1)) {
1738 __ vmov(dst.high(), dst.low());
1739 } else {
1740 __ vpadd(Neon32, dst.high(), src1.low(), src1.high());
1741 }
1742 } else {
1743 __ vpadd(Neon32, dst.high(), src1.low(), src1.high());
1744 __ vpadd(Neon32, dst.low(), src0.low(), src0.high());
1745 }
1746 break;
1747 }
1712 case kArmI32x4Sub: { 1748 case kArmI32x4Sub: {
1713 __ vsub(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0), 1749 __ vsub(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1714 i.InputSimd128Register(1)); 1750 i.InputSimd128Register(1));
1715 break; 1751 break;
1716 } 1752 }
1717 case kArmI32x4Mul: { 1753 case kArmI32x4Mul: {
1718 __ vmul(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0), 1754 __ vmul(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1719 i.InputSimd128Register(1)); 1755 i.InputSimd128Register(1));
1720 break; 1756 break;
1721 } 1757 }
(...skipping 128 matching lines...) Expand 10 before | Expand all | Expand 10 after
1850 case kArmI16x8Add: { 1886 case kArmI16x8Add: {
1851 __ vadd(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0), 1887 __ vadd(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
1852 i.InputSimd128Register(1)); 1888 i.InputSimd128Register(1));
1853 break; 1889 break;
1854 } 1890 }
1855 case kArmI16x8AddSaturateS: { 1891 case kArmI16x8AddSaturateS: {
1856 __ vqadd(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0), 1892 __ vqadd(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
1857 i.InputSimd128Register(1)); 1893 i.InputSimd128Register(1));
1858 break; 1894 break;
1859 } 1895 }
1896 case kArmI16x8AddHoriz: {
1897 Simd128Register dst = i.OutputSimd128Register(),
1898 src0 = i.InputSimd128Register(0),
1899 src1 = i.InputSimd128Register(1);
1900 // Make sure we don't overwrite source data before it's used.
1901 if (dst.is(src0)) {
1902 __ vpadd(Neon16, dst.low(), src0.low(), src0.high());
1903 if (dst.is(src1)) {
1904 __ vmov(dst.high(), dst.low());
1905 } else {
1906 __ vpadd(Neon16, dst.high(), src1.low(), src1.high());
1907 }
1908 } else {
1909 __ vpadd(Neon16, dst.high(), src1.low(), src1.high());
1910 __ vpadd(Neon16, dst.low(), src0.low(), src0.high());
1911 }
1912 break;
1913 }
1860 case kArmI16x8Sub: { 1914 case kArmI16x8Sub: {
1861 __ vsub(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0), 1915 __ vsub(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
1862 i.InputSimd128Register(1)); 1916 i.InputSimd128Register(1));
1863 break; 1917 break;
1864 } 1918 }
1865 case kArmI16x8SubSaturateS: { 1919 case kArmI16x8SubSaturateS: {
1866 __ vqsub(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0), 1920 __ vqsub(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
1867 i.InputSimd128Register(1)); 1921 i.InputSimd128Register(1));
1868 break; 1922 break;
1869 } 1923 }
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after
2018 case kArmI8x16Add: { 2072 case kArmI8x16Add: {
2019 __ vadd(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0), 2073 __ vadd(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2020 i.InputSimd128Register(1)); 2074 i.InputSimd128Register(1));
2021 break; 2075 break;
2022 } 2076 }
2023 case kArmI8x16AddSaturateS: { 2077 case kArmI8x16AddSaturateS: {
2024 __ vqadd(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0), 2078 __ vqadd(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2025 i.InputSimd128Register(1)); 2079 i.InputSimd128Register(1));
2026 break; 2080 break;
2027 } 2081 }
2082 case kArmI8x16AddHoriz: {
2083 Simd128Register dst = i.OutputSimd128Register(),
2084 src0 = i.InputSimd128Register(0),
2085 src1 = i.InputSimd128Register(1);
2086 // Make sure we don't overwrite source data before it's used.
2087 if (dst.is(src0)) {
2088 __ vpadd(Neon8, dst.low(), src0.low(), src0.high());
2089 if (dst.is(src1)) {
2090 __ vmov(dst.high(), dst.low());
2091 } else {
2092 __ vpadd(Neon8, dst.high(), src1.low(), src1.high());
2093 }
2094 } else {
2095 __ vpadd(Neon8, dst.high(), src1.low(), src1.high());
2096 __ vpadd(Neon8, dst.low(), src0.low(), src0.high());
2097 }
2098 break;
2099 }
2028 case kArmI8x16Sub: { 2100 case kArmI8x16Sub: {
2029 __ vsub(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0), 2101 __ vsub(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2030 i.InputSimd128Register(1)); 2102 i.InputSimd128Register(1));
2031 break; 2103 break;
2032 } 2104 }
2033 case kArmI8x16SubSaturateS: { 2105 case kArmI8x16SubSaturateS: {
2034 __ vqsub(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0), 2106 __ vqsub(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2035 i.InputSimd128Register(1)); 2107 i.InputSimd128Register(1));
2036 break; 2108 break;
2037 } 2109 }
(...skipping 1124 matching lines...) Expand 10 before | Expand all | Expand 10 after
3162 padding_size -= v8::internal::Assembler::kInstrSize; 3234 padding_size -= v8::internal::Assembler::kInstrSize;
3163 } 3235 }
3164 } 3236 }
3165 } 3237 }
3166 3238
3167 #undef __ 3239 #undef __
3168 3240
3169 } // namespace compiler 3241 } // namespace compiler
3170 } // namespace internal 3242 } // namespace internal
3171 } // namespace v8 3243 } // namespace v8
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698