Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/compiler/code-generator.h" | 5 #include "src/compiler/code-generator.h" |
| 6 | 6 |
| 7 #include "src/arm/macro-assembler-arm.h" | 7 #include "src/arm/macro-assembler-arm.h" |
| 8 #include "src/assembler-inl.h" | 8 #include "src/assembler-inl.h" |
| 9 #include "src/compilation-info.h" | 9 #include "src/compilation-info.h" |
| 10 #include "src/compiler/code-generator-impl.h" | 10 #include "src/compiler/code-generator-impl.h" |
| (...skipping 2010 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2021 __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0)); | 2021 __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| 2022 break; | 2022 break; |
| 2023 } | 2023 } |
| 2024 case kArmS128Select: { | 2024 case kArmS128Select: { |
| 2025 // vbsl clobbers the mask input so make sure it was DefineSameAsFirst. | 2025 // vbsl clobbers the mask input so make sure it was DefineSameAsFirst. |
| 2026 DCHECK(i.OutputSimd128Register().is(i.InputSimd128Register(0))); | 2026 DCHECK(i.OutputSimd128Register().is(i.InputSimd128Register(0))); |
| 2027 __ vbsl(i.OutputSimd128Register(), i.InputSimd128Register(1), | 2027 __ vbsl(i.OutputSimd128Register(), i.InputSimd128Register(1), |
| 2028 i.InputSimd128Register(2)); | 2028 i.InputSimd128Register(2)); |
| 2029 break; | 2029 break; |
| 2030 } | 2030 } |
| 2031 case kArmS32x4ZipLeft: { | |
| 2032 Simd128Register dst = i.OutputSimd128Register(), | |
| 2033 src1 = i.InputSimd128Register(1); | |
| 2034 DCHECK(dst.is(i.InputSimd128Register(0))); | |
| 2035 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7] | |
| 2036 __ vmov(dst.high(), src1.low()); // dst = [0, 1, 4, 5] | |
| 2037 __ vtrn(Neon32, dst.low(), dst.high()); // dst = [0, 4, 1, 5] | |
| 2038 break; | |
| 2039 } | |
| 2040 case kArmS32x4ZipRight: { | |
| 2041 Simd128Register dst = i.OutputSimd128Register(), | |
| 2042 src1 = i.InputSimd128Register(1); | |
| 2043 DCHECK(dst.is(i.InputSimd128Register(0))); | |
| 2044 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from ZipLeft). | |
| 2045 __ vmov(dst.low(), src1.high()); // dst = [2, 3, 6, 7] | |
| 2046 __ vtrn(Neon32, dst.low(), dst.high()); // dst = [2, 6, 3, 7] | |
| 2047 break; | |
| 2048 } | |
| 2049 case kArmS32x4UnzipLeft: { | |
|
martyn.capewell
2017/04/13 17:07:19
I think the unzip cases can still be replaced with
| |
| 2050 Simd128Register dst = i.OutputSimd128Register(), | |
| 2051 src1 = i.InputSimd128Register(1); | |
| 2052 DCHECK(dst.is(i.InputSimd128Register(0))); | |
| 2053 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7] | |
| 2054 __ vmov(kScratchQuadReg, src1); | |
| 2055 __ vuzp(Neon32, dst, kScratchQuadReg); // dst = [0, 2, 4, 6] | |
| 2056 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
| 2057 break; | |
| 2058 } | |
| 2059 case kArmS32x4UnzipRight: { | |
|
martyn.capewell
2017/04/13 17:07:19
if (dst.is(src0) && dst.is(src1)) {
__ vshrn(dst
| |
| 2060 Simd128Register dst = i.OutputSimd128Register(), | |
| 2061 src1 = i.InputSimd128Register(1); | |
| 2062 DCHECK(dst.is(i.InputSimd128Register(0))); | |
| 2063 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from UnzipLeft). | |
| 2064 __ vmov(kScratchQuadReg, src1); | |
| 2065 __ vuzp(Neon32, kScratchQuadReg, dst); // dst = [1, 3, 5, 7] | |
| 2066 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
| 2067 break; | |
| 2068 } | |
| 2069 case kArmS32x4TransposeLeft: { | |
| 2070 Simd128Register dst = i.OutputSimd128Register(), | |
| 2071 src1 = i.InputSimd128Register(1); | |
| 2072 DCHECK(dst.is(i.InputSimd128Register(0))); | |
| 2073 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7] | |
| 2074 __ vmov(kScratchQuadReg, src1); | |
| 2075 __ vtrn(Neon32, dst, kScratchQuadReg); // dst = [0, 4, 2, 6] | |
| 2076 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
| 2077 break; | |
| 2078 } | |
| 2079 case kArmS32x4TransposeRight: { | |
| 2080 Simd128Register dst = i.OutputSimd128Register(), | |
| 2081 src1 = i.InputSimd128Register(1); | |
| 2082 DCHECK(dst.is(i.InputSimd128Register(0))); | |
| 2083 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft). | |
| 2084 __ vmov(kScratchQuadReg, src1); | |
| 2085 __ vtrn(Neon32, kScratchQuadReg, dst); // dst = [1, 5, 3, 7] | |
| 2086 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
| 2087 break; | |
| 2088 } | |
| 2089 case kArmS16x8ZipLeft: { | |
| 2090 Simd128Register dst = i.OutputSimd128Register(), | |
| 2091 src1 = i.InputSimd128Register(1); | |
| 2092 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15] | |
| 2093 DCHECK(dst.is(i.InputSimd128Register(0))); | |
| 2094 __ vmov(dst.high(), src1.low()); // dst = [0, 1, 2, 3, 8, ... 11] | |
| 2095 __ vzip(Neon16, dst.low(), dst.high()); // dst = [0, 8, 1, 9, ... 11] | |
| 2096 break; | |
| 2097 } | |
| 2098 case kArmS16x8ZipRight: { | |
| 2099 Simd128Register dst = i.OutputSimd128Register(), | |
| 2100 src1 = i.InputSimd128Register(1); | |
| 2101 DCHECK(dst.is(i.InputSimd128Register(0))); | |
| 2102 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped). | |
| 2103 __ vmov(dst.low(), src1.high()); | |
| 2104 __ vzip(Neon16, dst.low(), dst.high()); // dst = [4, 12, 5, 13, ... 15] | |
| 2105 break; | |
| 2106 } | |
| 2107 case kArmS16x8UnzipLeft: { | |
| 2108 Simd128Register dst = i.OutputSimd128Register(), | |
| 2109 src1 = i.InputSimd128Register(1); | |
| 2110 DCHECK(dst.is(i.InputSimd128Register(0))); | |
| 2111 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15] | |
| 2112 __ vmov(kScratchQuadReg, src1); | |
| 2113 __ vuzp(Neon16, dst, kScratchQuadReg); // dst = [0, 2, 4, 6, ... 14] | |
| 2114 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
| 2115 break; | |
| 2116 } | |
| 2117 case kArmS16x8UnzipRight: { | |
| 2118 Simd128Register dst = i.OutputSimd128Register(), | |
| 2119 src1 = i.InputSimd128Register(1); | |
| 2120 DCHECK(dst.is(i.InputSimd128Register(0))); | |
| 2121 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped). | |
| 2122 __ vmov(kScratchQuadReg, src1); | |
| 2123 __ vuzp(Neon16, kScratchQuadReg, dst); // dst = [1, 3, 5, 7, ... 15] | |
| 2124 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
| 2125 break; | |
| 2126 } | |
| 2127 case kArmS16x8TransposeLeft: { | |
| 2128 Simd128Register dst = i.OutputSimd128Register(), | |
| 2129 src1 = i.InputSimd128Register(1); | |
| 2130 DCHECK(dst.is(i.InputSimd128Register(0))); | |
| 2131 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15] | |
| 2132 __ vmov(kScratchQuadReg, src1); | |
| 2133 __ vtrn(Neon16, dst, kScratchQuadReg); // dst = [0, 8, 2, 10, ... 14] | |
| 2134 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
| 2135 break; | |
| 2136 } | |
| 2137 case kArmS16x8TransposeRight: { | |
| 2138 Simd128Register dst = i.OutputSimd128Register(), | |
| 2139 src1 = i.InputSimd128Register(1); | |
| 2140 DCHECK(dst.is(i.InputSimd128Register(0))); | |
| 2141 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped). | |
| 2142 __ vmov(kScratchQuadReg, src1); | |
| 2143 __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15] | |
| 2144 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
| 2145 break; | |
| 2146 } | |
| 2147 case kArmS8x16ZipLeft: { | |
| 2148 Simd128Register dst = i.OutputSimd128Register(), | |
| 2149 src1 = i.InputSimd128Register(1); | |
| 2150 DCHECK(dst.is(i.InputSimd128Register(0))); | |
| 2151 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31] | |
| 2152 __ vmov(dst.high(), src1.low()); | |
| 2153 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23] | |
| 2154 break; | |
| 2155 } | |
| 2156 case kArmS8x16ZipRight: { | |
| 2157 Simd128Register dst = i.OutputSimd128Register(), | |
| 2158 src1 = i.InputSimd128Register(1); | |
| 2159 DCHECK(dst.is(i.InputSimd128Register(0))); | |
| 2160 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped). | |
| 2161 __ vmov(dst.low(), src1.high()); | |
| 2162 __ vzip(Neon8, dst.low(), dst.high()); // dst = [8, 24, 9, 25, ... 31] | |
| 2163 break; | |
| 2164 } | |
| 2165 case kArmS8x16UnzipLeft: { | |
| 2166 Simd128Register dst = i.OutputSimd128Register(), | |
| 2167 src1 = i.InputSimd128Register(1); | |
| 2168 DCHECK(dst.is(i.InputSimd128Register(0))); | |
| 2169 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31] | |
| 2170 __ vmov(kScratchQuadReg, src1); | |
| 2171 __ vuzp(Neon8, dst, kScratchQuadReg); // dst = [0, 2, 4, 6, ... 30] | |
| 2172 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
| 2173 break; | |
| 2174 } | |
| 2175 case kArmS8x16UnzipRight: { | |
| 2176 Simd128Register dst = i.OutputSimd128Register(), | |
| 2177 src1 = i.InputSimd128Register(1); | |
| 2178 DCHECK(dst.is(i.InputSimd128Register(0))); | |
| 2179 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped). | |
| 2180 __ vmov(kScratchQuadReg, src1); | |
| 2181 __ vuzp(Neon8, kScratchQuadReg, dst); // dst = [1, 3, 5, 7, ... 31] | |
| 2182 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
| 2183 break; | |
| 2184 } | |
| 2185 case kArmS8x16TransposeLeft: { | |
| 2186 Simd128Register dst = i.OutputSimd128Register(), | |
| 2187 src1 = i.InputSimd128Register(1); | |
| 2188 DCHECK(dst.is(i.InputSimd128Register(0))); | |
| 2189 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31] | |
| 2190 __ vmov(kScratchQuadReg, src1); | |
| 2191 __ vtrn(Neon8, dst, kScratchQuadReg); // dst = [0, 16, 2, 18, ... 30] | |
| 2192 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
| 2193 break; | |
| 2194 } | |
| 2195 case kArmS8x16TransposeRight: { | |
| 2196 Simd128Register dst = i.OutputSimd128Register(), | |
| 2197 src1 = i.InputSimd128Register(1); | |
| 2198 DCHECK(dst.is(i.InputSimd128Register(0))); | |
| 2199 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped). | |
| 2200 __ vmov(kScratchQuadReg, src1); | |
| 2201 __ vtrn(Neon8, kScratchQuadReg, dst); // dst = [1, 17, 3, 19, ... 31] | |
| 2202 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); | |
| 2203 break; | |
| 2204 } | |
| 2205 case kArmS8x16Concat: { | |
| 2206 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0), | |
| 2207 i.InputSimd128Register(1), i.InputInt4(2)); | |
| 2208 break; | |
| 2209 } | |
| 2210 case kArmS32x2Reverse: { | |
| 2211 __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0)); | |
| 2212 break; | |
| 2213 } | |
| 2214 case kArmS16x4Reverse: { | |
| 2215 __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); | |
| 2216 break; | |
| 2217 } | |
| 2218 case kArmS16x2Reverse: { | |
| 2219 __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); | |
| 2220 break; | |
| 2221 } | |
| 2222 case kArmS8x8Reverse: { | |
| 2223 __ vrev64(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0)); | |
| 2224 break; | |
| 2225 } | |
| 2226 case kArmS8x4Reverse: { | |
| 2227 __ vrev32(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0)); | |
| 2228 break; | |
| 2229 } | |
| 2230 case kArmS8x2Reverse: { | |
| 2231 __ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0)); | |
| 2232 break; | |
| 2233 } | |
| 2031 case kArmS1x4AnyTrue: { | 2234 case kArmS1x4AnyTrue: { |
| 2032 const QwNeonRegister& src = i.InputSimd128Register(0); | 2235 const QwNeonRegister& src = i.InputSimd128Register(0); |
| 2033 __ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high()); | 2236 __ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high()); |
| 2034 __ vpmax(NeonU32, kScratchDoubleReg, kScratchDoubleReg, | 2237 __ vpmax(NeonU32, kScratchDoubleReg, kScratchDoubleReg, |
| 2035 kScratchDoubleReg); | 2238 kScratchDoubleReg); |
| 2036 __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0); | 2239 __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0); |
| 2037 break; | 2240 break; |
| 2038 } | 2241 } |
| 2039 case kArmS1x4AllTrue: { | 2242 case kArmS1x4AllTrue: { |
| 2040 const QwNeonRegister& src = i.InputSimd128Register(0); | 2243 const QwNeonRegister& src = i.InputSimd128Register(0); |
| (...skipping 790 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2831 padding_size -= v8::internal::Assembler::kInstrSize; | 3034 padding_size -= v8::internal::Assembler::kInstrSize; |
| 2832 } | 3035 } |
| 2833 } | 3036 } |
| 2834 } | 3037 } |
| 2835 | 3038 |
| 2836 #undef __ | 3039 #undef __ |
| 2837 | 3040 |
| 2838 } // namespace compiler | 3041 } // namespace compiler |
| 2839 } // namespace internal | 3042 } // namespace internal |
| 2840 } // namespace v8 | 3043 } // namespace v8 |
| OLD | NEW |