OLD | NEW |
1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/compiler/code-generator.h" | 5 #include "src/compiler/code-generator.h" |
6 | 6 |
7 #include "src/arm/macro-assembler-arm.h" | 7 #include "src/arm/macro-assembler-arm.h" |
8 #include "src/assembler-inl.h" | 8 #include "src/assembler-inl.h" |
9 #include "src/compilation-info.h" | 9 #include "src/compilation-info.h" |
10 #include "src/compiler/code-generator-impl.h" | 10 #include "src/compiler/code-generator-impl.h" |
(...skipping 329 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
340 return pl; | 340 return pl; |
341 case kNegative: | 341 case kNegative: |
342 return mi; | 342 return mi; |
343 default: | 343 default: |
344 break; | 344 break; |
345 } | 345 } |
346 UNREACHABLE(); | 346 UNREACHABLE(); |
347 return kNoCondition; | 347 return kNoCondition; |
348 } | 348 } |
349 | 349 |
| 350 int GetVtblTableSize(const Simd128Register& src0, const Simd128Register& src1) { |
| 351 // If unary shuffle, table is src0 (2 d-registers). |
| 352 if (src0.is(src1)) return 2; |
| 353 // Binary shuffle, table is src0, src1. They must be consecutive |
| 354 DCHECK_EQ(src0.code() + 1, src1.code()); |
| 355 return 4; // 4 d-registers. |
| 356 } |
| 357 |
350 } // namespace | 358 } // namespace |
351 | 359 |
352 #define ASSEMBLE_CHECKED_LOAD_FP(Type) \ | 360 #define ASSEMBLE_CHECKED_LOAD_FP(Type) \ |
353 do { \ | 361 do { \ |
354 auto result = i.Output##Type##Register(); \ | 362 auto result = i.Output##Type##Register(); \ |
355 auto offset = i.InputRegister(0); \ | 363 auto offset = i.InputRegister(0); \ |
356 if (instr->InputAt(1)->IsRegister()) { \ | 364 if (instr->InputAt(1)->IsRegister()) { \ |
357 __ cmp(offset, i.InputRegister(1)); \ | 365 __ cmp(offset, i.InputRegister(1)); \ |
358 } else { \ | 366 } else { \ |
359 __ cmp(offset, i.InputImmediate(1)); \ | 367 __ cmp(offset, i.InputImmediate(1)); \ |
(...skipping 1819 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2179 } | 2187 } |
2180 case kArmS32x4TransposeLeft: { | 2188 case kArmS32x4TransposeLeft: { |
2181 Simd128Register dst = i.OutputSimd128Register(), | 2189 Simd128Register dst = i.OutputSimd128Register(), |
2182 src1 = i.InputSimd128Register(1); | 2190 src1 = i.InputSimd128Register(1); |
2183 DCHECK(dst.is(i.InputSimd128Register(0))); | 2191 DCHECK(dst.is(i.InputSimd128Register(0))); |
2184 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7] | 2192 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7] |
2185 __ vmov(kScratchQuadReg, src1); | 2193 __ vmov(kScratchQuadReg, src1); |
2186 __ vtrn(Neon32, dst, kScratchQuadReg); // dst = [0, 4, 2, 6] | 2194 __ vtrn(Neon32, dst, kScratchQuadReg); // dst = [0, 4, 2, 6] |
2187 break; | 2195 break; |
2188 } | 2196 } |
| 2197 case kArmS32x4Shuffle: { |
| 2198 Simd128Register dst = i.OutputSimd128Register(), |
| 2199 src0 = i.InputSimd128Register(0), |
| 2200 src1 = i.InputSimd128Register(1); |
| 2201 // Check for in-place shuffles. |
| 2202 // If dst == src0 == src1, then the shuffle is unary and we only use src0. |
| 2203 if (dst.is(src0)) { |
| 2204 __ vmov(kScratchQuadReg, src0); |
| 2205 src0 = kScratchQuadReg; |
| 2206 } else if (dst.is(src1)) { |
| 2207 __ vmov(kScratchQuadReg, src1); |
| 2208 src1 = kScratchQuadReg; |
| 2209 } |
| 2210 // Perform shuffle as a vmov per lane. |
| 2211 int dst_code = dst.code() * 4; |
| 2212 int src0_code = src0.code() * 4; |
| 2213 int src1_code = src1.code() * 4; |
| 2214 int32_t shuffle = i.InputInt32(2); |
| 2215 for (int i = 0; i < 4; i++) { |
| 2216 int lane = shuffle & 0x7; |
| 2217 int src_code = src0_code; |
| 2218 if (lane >= 4) { |
| 2219 src_code = src1_code; |
| 2220 lane &= 0x3; |
| 2221 } |
| 2222 __ VmovExtended(dst_code + i, src_code + lane, kScratchReg); |
| 2223 shuffle >>= 8; |
| 2224 } |
| 2225 break; |
| 2226 } |
2189 case kArmS32x4TransposeRight: { | 2227 case kArmS32x4TransposeRight: { |
2190 Simd128Register dst = i.OutputSimd128Register(), | 2228 Simd128Register dst = i.OutputSimd128Register(), |
2191 src1 = i.InputSimd128Register(1); | 2229 src1 = i.InputSimd128Register(1); |
2192 DCHECK(dst.is(i.InputSimd128Register(0))); | 2230 DCHECK(dst.is(i.InputSimd128Register(0))); |
2193 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft). | 2231 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft). |
2194 __ vmov(kScratchQuadReg, src1); | 2232 __ vmov(kScratchQuadReg, src1); |
2195 __ vtrn(Neon32, kScratchQuadReg, dst); // dst = [1, 5, 3, 7] | 2233 __ vtrn(Neon32, kScratchQuadReg, dst); // dst = [1, 5, 3, 7] |
2196 break; | 2234 break; |
2197 } | 2235 } |
2198 case kArmS16x8ZipLeft: { | 2236 case kArmS16x8ZipLeft: { |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2242 } | 2280 } |
2243 case kArmS16x8TransposeRight: { | 2281 case kArmS16x8TransposeRight: { |
2244 Simd128Register dst = i.OutputSimd128Register(), | 2282 Simd128Register dst = i.OutputSimd128Register(), |
2245 src1 = i.InputSimd128Register(1); | 2283 src1 = i.InputSimd128Register(1); |
2246 DCHECK(dst.is(i.InputSimd128Register(0))); | 2284 DCHECK(dst.is(i.InputSimd128Register(0))); |
2247 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped). | 2285 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped). |
2248 __ vmov(kScratchQuadReg, src1); | 2286 __ vmov(kScratchQuadReg, src1); |
2249 __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15] | 2287 __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15] |
2250 break; | 2288 break; |
2251 } | 2289 } |
| 2290 case kArmS16x8Shuffle: { |
| 2291 Simd128Register dst = i.OutputSimd128Register(), |
| 2292 src0 = i.InputSimd128Register(0), |
| 2293 src1 = i.InputSimd128Register(1); |
| 2294 DwVfpRegister table_base = src0.low(); |
| 2295 int table_size = GetVtblTableSize(src0, src1); |
| 2296 // Convert the shuffle lane masks to byte masks in kScratchQuadReg. |
| 2297 int scratch_s_base = kScratchQuadReg.code() * 4; |
| 2298 for (int j = 0; j < 2; j++) { |
| 2299 int32_t four_lanes = i.InputInt32(2 + j); |
| 2300 for (int k = 0; k < 2; k++) { |
| 2301 uint8_t w0 = (four_lanes & 0xF) * kShortSize; |
| 2302 four_lanes >>= 8; |
| 2303 uint8_t w1 = (four_lanes & 0xF) * kShortSize; |
| 2304 four_lanes >>= 8; |
| 2305 int32_t mask = w0 | ((w0 + 1) << 8) | (w1 << 16) | ((w1 + 1) << 24); |
| 2306 // Ensure byte indices are in [0, 31] so masks are never NaNs. |
| 2307 four_lanes &= 0x1F1F1F1F; |
| 2308 __ vmov(SwVfpRegister::from_code(scratch_s_base + 2 * j + k), |
| 2309 bit_cast<float>(mask)); |
| 2310 } |
| 2311 } |
| 2312 NeonListOperand table(table_base, table_size); |
| 2313 if (!dst.is(src0) && !dst.is(src1)) { |
| 2314 __ vtbl(dst.low(), table, kScratchQuadReg.low()); |
| 2315 __ vtbl(dst.high(), table, kScratchQuadReg.high()); |
| 2316 } else { |
| 2317 __ vtbl(kScratchQuadReg.low(), table, kScratchQuadReg.low()); |
| 2318 __ vtbl(kScratchQuadReg.high(), table, kScratchQuadReg.high()); |
| 2319 __ vmov(dst, kScratchQuadReg); |
| 2320 } |
| 2321 break; |
| 2322 } |
2252 case kArmS8x16ZipLeft: { | 2323 case kArmS8x16ZipLeft: { |
2253 Simd128Register dst = i.OutputSimd128Register(), | 2324 Simd128Register dst = i.OutputSimd128Register(), |
2254 src1 = i.InputSimd128Register(1); | 2325 src1 = i.InputSimd128Register(1); |
2255 DCHECK(dst.is(i.InputSimd128Register(0))); | 2326 DCHECK(dst.is(i.InputSimd128Register(0))); |
2256 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31] | 2327 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31] |
2257 __ vmov(dst.high(), src1.low()); | 2328 __ vmov(dst.high(), src1.low()); |
2258 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23] | 2329 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23] |
2259 break; | 2330 break; |
2260 } | 2331 } |
2261 case kArmS8x16ZipRight: { | 2332 case kArmS8x16ZipRight: { |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2301 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped). | 2372 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped). |
2302 __ vmov(kScratchQuadReg, src1); | 2373 __ vmov(kScratchQuadReg, src1); |
2303 __ vtrn(Neon8, kScratchQuadReg, dst); // dst = [1, 17, 3, 19, ... 31] | 2374 __ vtrn(Neon8, kScratchQuadReg, dst); // dst = [1, 17, 3, 19, ... 31] |
2304 break; | 2375 break; |
2305 } | 2376 } |
2306 case kArmS8x16Concat: { | 2377 case kArmS8x16Concat: { |
2307 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0), | 2378 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0), |
2308 i.InputSimd128Register(1), i.InputInt4(2)); | 2379 i.InputSimd128Register(1), i.InputInt4(2)); |
2309 break; | 2380 break; |
2310 } | 2381 } |
| 2382 case kArmS8x16Shuffle: { |
| 2383 Simd128Register dst = i.OutputSimd128Register(), |
| 2384 src0 = i.InputSimd128Register(0), |
| 2385 src1 = i.InputSimd128Register(1); |
| 2386 DwVfpRegister table_base = src0.low(); |
| 2387 int table_size = GetVtblTableSize(src0, src1); |
| 2388 // The shuffle lane mask is a byte mask, materialize in kScratchQuadReg. |
| 2389 int scratch_s_base = kScratchQuadReg.code() * 4; |
| 2390 for (int j = 0; j < 4; j++) { |
| 2391 int32_t four_lanes = i.InputInt32(2 + j); |
| 2392 // Ensure byte indices are in [0, 31] so masks are never NaNs. |
| 2393 four_lanes &= 0x1F1F1F1F; |
| 2394 __ vmov(SwVfpRegister::from_code(scratch_s_base + j), |
| 2395 bit_cast<float>(four_lanes)); |
| 2396 } |
| 2397 NeonListOperand table(table_base, table_size); |
| 2398 if (!dst.is(src0) && !dst.is(src1)) { |
| 2399 __ vtbl(dst.low(), table, kScratchQuadReg.low()); |
| 2400 __ vtbl(dst.high(), table, kScratchQuadReg.high()); |
| 2401 } else { |
| 2402 __ vtbl(kScratchQuadReg.low(), table, kScratchQuadReg.low()); |
| 2403 __ vtbl(kScratchQuadReg.high(), table, kScratchQuadReg.high()); |
| 2404 __ vmov(dst, kScratchQuadReg); |
| 2405 } |
| 2406 break; |
| 2407 } |
2311 case kArmS32x2Reverse: { | 2408 case kArmS32x2Reverse: { |
2312 __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0)); | 2409 __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2313 break; | 2410 break; |
2314 } | 2411 } |
2315 case kArmS16x4Reverse: { | 2412 case kArmS16x4Reverse: { |
2316 __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); | 2413 __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2317 break; | 2414 break; |
2318 } | 2415 } |
2319 case kArmS16x2Reverse: { | 2416 case kArmS16x2Reverse: { |
2320 __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); | 2417 __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
(...skipping 826 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3147 padding_size -= v8::internal::Assembler::kInstrSize; | 3244 padding_size -= v8::internal::Assembler::kInstrSize; |
3148 } | 3245 } |
3149 } | 3246 } |
3150 } | 3247 } |
3151 | 3248 |
3152 #undef __ | 3249 #undef __ |
3153 | 3250 |
3154 } // namespace compiler | 3251 } // namespace compiler |
3155 } // namespace internal | 3252 } // namespace internal |
3156 } // namespace v8 | 3253 } // namespace v8 |
OLD | NEW |