OLD | NEW |
1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/compiler/code-generator.h" | 5 #include "src/compiler/code-generator.h" |
6 | 6 |
7 #include "src/arm/macro-assembler-arm.h" | 7 #include "src/arm/macro-assembler-arm.h" |
8 #include "src/assembler-inl.h" | 8 #include "src/assembler-inl.h" |
9 #include "src/compilation-info.h" | 9 #include "src/compilation-info.h" |
10 #include "src/compiler/code-generator-impl.h" | 10 #include "src/compiler/code-generator-impl.h" |
(...skipping 2138 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2149 __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0)); | 2149 __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2150 break; | 2150 break; |
2151 } | 2151 } |
2152 case kArmS128Select: { | 2152 case kArmS128Select: { |
2153 // vbsl clobbers the mask input so make sure it was DefineSameAsFirst. | 2153 // vbsl clobbers the mask input so make sure it was DefineSameAsFirst. |
2154 DCHECK(i.OutputSimd128Register().is(i.InputSimd128Register(0))); | 2154 DCHECK(i.OutputSimd128Register().is(i.InputSimd128Register(0))); |
2155 __ vbsl(i.OutputSimd128Register(), i.InputSimd128Register(1), | 2155 __ vbsl(i.OutputSimd128Register(), i.InputSimd128Register(1), |
2156 i.InputSimd128Register(2)); | 2156 i.InputSimd128Register(2)); |
2157 break; | 2157 break; |
2158 } | 2158 } |
| 2159 case kArmS32x4ZipLeft: { |
| 2160 Simd128Register dst = i.OutputSimd128Register(), |
| 2161 src1 = i.InputSimd128Register(1); |
| 2162 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2163 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7] |
| 2164 __ vmov(dst.high(), src1.low()); // dst = [0, 1, 4, 5] |
| 2165 __ vtrn(Neon32, dst.low(), dst.high()); // dst = [0, 4, 1, 5] |
| 2166 break; |
| 2167 } |
| 2168 case kArmS32x4ZipRight: { |
| 2169 Simd128Register dst = i.OutputSimd128Register(), |
| 2170 src1 = i.InputSimd128Register(1); |
| 2171 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2172 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from ZipLeft). |
| 2173 __ vmov(dst.low(), src1.high()); // dst = [2, 3, 6, 7] |
| 2174 __ vtrn(Neon32, dst.low(), dst.high()); // dst = [2, 6, 3, 7] |
| 2175 break; |
| 2176 } |
| 2177 case kArmS32x4UnzipLeft: { |
| 2178 Simd128Register dst = i.OutputSimd128Register(), |
| 2179 src1 = i.InputSimd128Register(1); |
| 2180 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2181 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7] |
| 2182 __ vmov(kScratchQuadReg, src1); |
| 2183 __ vuzp(Neon32, dst, kScratchQuadReg); // dst = [0, 2, 4, 6] |
| 2184 break; |
| 2185 } |
| 2186 case kArmS32x4UnzipRight: { |
| 2187 Simd128Register dst = i.OutputSimd128Register(), |
| 2188 src1 = i.InputSimd128Register(1); |
| 2189 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2190 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from UnzipLeft). |
| 2191 __ vmov(kScratchQuadReg, src1); |
| 2192 __ vuzp(Neon32, kScratchQuadReg, dst); // dst = [1, 3, 5, 7] |
| 2193 break; |
| 2194 } |
| 2195 case kArmS32x4TransposeLeft: { |
| 2196 Simd128Register dst = i.OutputSimd128Register(), |
| 2197 src1 = i.InputSimd128Register(1); |
| 2198 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2199 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7] |
| 2200 __ vmov(kScratchQuadReg, src1); |
| 2201 __ vtrn(Neon32, dst, kScratchQuadReg); // dst = [0, 4, 2, 6] |
| 2202 break; |
| 2203 } |
| 2204 case kArmS32x4TransposeRight: { |
| 2205 Simd128Register dst = i.OutputSimd128Register(), |
| 2206 src1 = i.InputSimd128Register(1); |
| 2207 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2208 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft). |
| 2209 __ vmov(kScratchQuadReg, src1); |
| 2210 __ vtrn(Neon32, kScratchQuadReg, dst); // dst = [1, 5, 3, 7] |
| 2211 break; |
| 2212 } |
| 2213 case kArmS16x8ZipLeft: { |
| 2214 Simd128Register dst = i.OutputSimd128Register(), |
| 2215 src1 = i.InputSimd128Register(1); |
| 2216 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15] |
| 2217 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2218 __ vmov(dst.high(), src1.low()); // dst = [0, 1, 2, 3, 8, ... 11] |
| 2219 __ vzip(Neon16, dst.low(), dst.high()); // dst = [0, 8, 1, 9, ... 11] |
| 2220 break; |
| 2221 } |
| 2222 case kArmS16x8ZipRight: { |
| 2223 Simd128Register dst = i.OutputSimd128Register(), |
| 2224 src1 = i.InputSimd128Register(1); |
| 2225 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2226 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped). |
| 2227 __ vmov(dst.low(), src1.high()); |
| 2228 __ vzip(Neon16, dst.low(), dst.high()); // dst = [4, 12, 5, 13, ... 15] |
| 2229 break; |
| 2230 } |
| 2231 case kArmS16x8UnzipLeft: { |
| 2232 Simd128Register dst = i.OutputSimd128Register(), |
| 2233 src1 = i.InputSimd128Register(1); |
| 2234 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2235 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15] |
| 2236 __ vmov(kScratchQuadReg, src1); |
| 2237 __ vuzp(Neon16, dst, kScratchQuadReg); // dst = [0, 2, 4, 6, ... 14] |
| 2238 break; |
| 2239 } |
| 2240 case kArmS16x8UnzipRight: { |
| 2241 Simd128Register dst = i.OutputSimd128Register(), |
| 2242 src1 = i.InputSimd128Register(1); |
| 2243 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2244 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped). |
| 2245 __ vmov(kScratchQuadReg, src1); |
| 2246 __ vuzp(Neon16, kScratchQuadReg, dst); // dst = [1, 3, 5, 7, ... 15] |
| 2247 break; |
| 2248 } |
| 2249 case kArmS16x8TransposeLeft: { |
| 2250 Simd128Register dst = i.OutputSimd128Register(), |
| 2251 src1 = i.InputSimd128Register(1); |
| 2252 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2253 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15] |
| 2254 __ vmov(kScratchQuadReg, src1); |
| 2255 __ vtrn(Neon16, dst, kScratchQuadReg); // dst = [0, 8, 2, 10, ... 14] |
| 2256 break; |
| 2257 } |
| 2258 case kArmS16x8TransposeRight: { |
| 2259 Simd128Register dst = i.OutputSimd128Register(), |
| 2260 src1 = i.InputSimd128Register(1); |
| 2261 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2262 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped). |
| 2263 __ vmov(kScratchQuadReg, src1); |
| 2264 __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15] |
| 2265 break; |
| 2266 } |
| 2267 case kArmS8x16ZipLeft: { |
| 2268 Simd128Register dst = i.OutputSimd128Register(), |
| 2269 src1 = i.InputSimd128Register(1); |
| 2270 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2271 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31] |
| 2272 __ vmov(dst.high(), src1.low()); |
| 2273 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23] |
| 2274 break; |
| 2275 } |
| 2276 case kArmS8x16ZipRight: { |
| 2277 Simd128Register dst = i.OutputSimd128Register(), |
| 2278 src1 = i.InputSimd128Register(1); |
| 2279 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2280 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped). |
| 2281 __ vmov(dst.low(), src1.high()); |
| 2282 __ vzip(Neon8, dst.low(), dst.high()); // dst = [8, 24, 9, 25, ... 31] |
| 2283 break; |
| 2284 } |
| 2285 case kArmS8x16UnzipLeft: { |
| 2286 Simd128Register dst = i.OutputSimd128Register(), |
| 2287 src1 = i.InputSimd128Register(1); |
| 2288 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2289 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31] |
| 2290 __ vmov(kScratchQuadReg, src1); |
| 2291 __ vuzp(Neon8, dst, kScratchQuadReg); // dst = [0, 2, 4, 6, ... 30] |
| 2292 break; |
| 2293 } |
| 2294 case kArmS8x16UnzipRight: { |
| 2295 Simd128Register dst = i.OutputSimd128Register(), |
| 2296 src1 = i.InputSimd128Register(1); |
| 2297 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2298 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped). |
| 2299 __ vmov(kScratchQuadReg, src1); |
| 2300 __ vuzp(Neon8, kScratchQuadReg, dst); // dst = [1, 3, 5, 7, ... 31] |
| 2301 break; |
| 2302 } |
| 2303 case kArmS8x16TransposeLeft: { |
| 2304 Simd128Register dst = i.OutputSimd128Register(), |
| 2305 src1 = i.InputSimd128Register(1); |
| 2306 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2307 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31] |
| 2308 __ vmov(kScratchQuadReg, src1); |
| 2309 __ vtrn(Neon8, dst, kScratchQuadReg); // dst = [0, 16, 2, 18, ... 30] |
| 2310 break; |
| 2311 } |
| 2312 case kArmS8x16TransposeRight: { |
| 2313 Simd128Register dst = i.OutputSimd128Register(), |
| 2314 src1 = i.InputSimd128Register(1); |
| 2315 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2316 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped). |
| 2317 __ vmov(kScratchQuadReg, src1); |
| 2318 __ vtrn(Neon8, kScratchQuadReg, dst); // dst = [1, 17, 3, 19, ... 31] |
| 2319 break; |
| 2320 } |
| 2321 case kArmS8x16Concat: { |
| 2322 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| 2323 i.InputSimd128Register(1), i.InputInt4(2)); |
| 2324 break; |
| 2325 } |
| 2326 case kArmS32x2Reverse: { |
| 2327 __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| 2328 break; |
| 2329 } |
| 2330 case kArmS16x4Reverse: { |
| 2331 __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| 2332 break; |
| 2333 } |
| 2334 case kArmS16x2Reverse: { |
| 2335 __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| 2336 break; |
| 2337 } |
| 2338 case kArmS8x8Reverse: { |
| 2339 __ vrev64(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| 2340 break; |
| 2341 } |
| 2342 case kArmS8x4Reverse: { |
| 2343 __ vrev32(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| 2344 break; |
| 2345 } |
| 2346 case kArmS8x2Reverse: { |
| 2347 __ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| 2348 break; |
| 2349 } |
2159 case kArmS1x4AnyTrue: { | 2350 case kArmS1x4AnyTrue: { |
2160 const QwNeonRegister& src = i.InputSimd128Register(0); | 2351 const QwNeonRegister& src = i.InputSimd128Register(0); |
2161 __ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high()); | 2352 __ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high()); |
2162 __ vpmax(NeonU32, kScratchDoubleReg, kScratchDoubleReg, | 2353 __ vpmax(NeonU32, kScratchDoubleReg, kScratchDoubleReg, |
2163 kScratchDoubleReg); | 2354 kScratchDoubleReg); |
2164 __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0); | 2355 __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0); |
2165 break; | 2356 break; |
2166 } | 2357 } |
2167 case kArmS1x4AllTrue: { | 2358 case kArmS1x4AllTrue: { |
2168 const QwNeonRegister& src = i.InputSimd128Register(0); | 2359 const QwNeonRegister& src = i.InputSimd128Register(0); |
(...skipping 802 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2971 padding_size -= v8::internal::Assembler::kInstrSize; | 3162 padding_size -= v8::internal::Assembler::kInstrSize; |
2972 } | 3163 } |
2973 } | 3164 } |
2974 } | 3165 } |
2975 | 3166 |
2976 #undef __ | 3167 #undef __ |
2977 | 3168 |
2978 } // namespace compiler | 3169 } // namespace compiler |
2979 } // namespace internal | 3170 } // namespace internal |
2980 } // namespace v8 | 3171 } // namespace v8 |
OLD | NEW |