OLD | NEW |
1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/compiler/code-generator.h" | 5 #include "src/compiler/code-generator.h" |
6 | 6 |
7 #include "src/arm/macro-assembler-arm.h" | 7 #include "src/arm/macro-assembler-arm.h" |
8 #include "src/assembler-inl.h" | 8 #include "src/assembler-inl.h" |
9 #include "src/compilation-info.h" | 9 #include "src/compilation-info.h" |
10 #include "src/compiler/code-generator-impl.h" | 10 #include "src/compiler/code-generator-impl.h" |
(...skipping 322 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
333 case kPositiveOrZero: | 333 case kPositiveOrZero: |
334 return pl; | 334 return pl; |
335 case kNegative: | 335 case kNegative: |
336 return mi; | 336 return mi; |
337 default: | 337 default: |
338 break; | 338 break; |
339 } | 339 } |
340 UNREACHABLE(); | 340 UNREACHABLE(); |
341 } | 341 } |
342 | 342 |
343 int GetVtblTableSize(const Simd128Register& src0, const Simd128Register& src1) { | |
344 // If unary shuffle, table is src0 (2 d-registers). | |
345 if (src0.is(src1)) return 2; | |
346 // Binary shuffle, table is src0, src1. They must be consecutive | |
347 DCHECK_EQ(src0.code() + 1, src1.code()); | |
348 return 4; // 4 d-registers. | |
349 } | |
350 | |
351 } // namespace | 343 } // namespace |
352 | 344 |
353 #define ASSEMBLE_CHECKED_LOAD_FP(Type) \ | 345 #define ASSEMBLE_CHECKED_LOAD_FP(Type) \ |
354 do { \ | 346 do { \ |
355 auto result = i.Output##Type##Register(); \ | 347 auto result = i.Output##Type##Register(); \ |
356 auto offset = i.InputRegister(0); \ | 348 auto offset = i.InputRegister(0); \ |
357 if (instr->InputAt(1)->IsRegister()) { \ | 349 if (instr->InputAt(1)->IsRegister()) { \ |
358 __ cmp(offset, i.InputRegister(1)); \ | 350 __ cmp(offset, i.InputRegister(1)); \ |
359 } else { \ | 351 } else { \ |
360 __ cmp(offset, i.InputImmediate(1)); \ | 352 __ cmp(offset, i.InputImmediate(1)); \ |
(...skipping 1923 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2284 } | 2276 } |
2285 case kArmS16x8TransposeRight: { | 2277 case kArmS16x8TransposeRight: { |
2286 Simd128Register dst = i.OutputSimd128Register(), | 2278 Simd128Register dst = i.OutputSimd128Register(), |
2287 src1 = i.InputSimd128Register(1); | 2279 src1 = i.InputSimd128Register(1); |
2288 DCHECK(dst.is(i.InputSimd128Register(0))); | 2280 DCHECK(dst.is(i.InputSimd128Register(0))); |
2289 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped). | 2281 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped). |
2290 __ vmov(kScratchQuadReg, src1); | 2282 __ vmov(kScratchQuadReg, src1); |
2291 __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15] | 2283 __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15] |
2292 break; | 2284 break; |
2293 } | 2285 } |
2294 case kArmS16x8Shuffle: { | |
2295 Simd128Register dst = i.OutputSimd128Register(), | |
2296 src0 = i.InputSimd128Register(0), | |
2297 src1 = i.InputSimd128Register(1); | |
2298 DwVfpRegister table_base = src0.low(); | |
2299 int table_size = GetVtblTableSize(src0, src1); | |
2300 // Convert the shuffle lane masks to byte masks in kScratchQuadReg. | |
2301 int scratch_s_base = kScratchQuadReg.code() * 4; | |
2302 for (int j = 0; j < 2; j++) { | |
2303 int32_t four_lanes = i.InputInt32(2 + j); | |
2304 for (int k = 0; k < 2; k++) { | |
2305 uint8_t w0 = (four_lanes & 0xF) * kShortSize; | |
2306 four_lanes >>= 8; | |
2307 uint8_t w1 = (four_lanes & 0xF) * kShortSize; | |
2308 four_lanes >>= 8; | |
2309 int32_t mask = w0 | ((w0 + 1) << 8) | (w1 << 16) | ((w1 + 1) << 24); | |
2310 // Ensure byte indices are in [0, 31] so masks are never NaNs. | |
2311 four_lanes &= 0x1F1F1F1F; | |
2312 __ vmov(SwVfpRegister::from_code(scratch_s_base + 2 * j + k), | |
2313 bit_cast<float>(mask)); | |
2314 } | |
2315 } | |
2316 NeonListOperand table(table_base, table_size); | |
2317 if (!dst.is(src0) && !dst.is(src1)) { | |
2318 __ vtbl(dst.low(), table, kScratchQuadReg.low()); | |
2319 __ vtbl(dst.high(), table, kScratchQuadReg.high()); | |
2320 } else { | |
2321 __ vtbl(kScratchQuadReg.low(), table, kScratchQuadReg.low()); | |
2322 __ vtbl(kScratchQuadReg.high(), table, kScratchQuadReg.high()); | |
2323 __ vmov(dst, kScratchQuadReg); | |
2324 } | |
2325 break; | |
2326 } | |
2327 case kArmS8x16ZipLeft: { | 2286 case kArmS8x16ZipLeft: { |
2328 Simd128Register dst = i.OutputSimd128Register(), | 2287 Simd128Register dst = i.OutputSimd128Register(), |
2329 src1 = i.InputSimd128Register(1); | 2288 src1 = i.InputSimd128Register(1); |
2330 DCHECK(dst.is(i.InputSimd128Register(0))); | 2289 DCHECK(dst.is(i.InputSimd128Register(0))); |
2331 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31] | 2290 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31] |
2332 __ vmov(dst.high(), src1.low()); | 2291 __ vmov(dst.high(), src1.low()); |
2333 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23] | 2292 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23] |
2334 break; | 2293 break; |
2335 } | 2294 } |
2336 case kArmS8x16ZipRight: { | 2295 case kArmS8x16ZipRight: { |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2381 case kArmS8x16Concat: { | 2340 case kArmS8x16Concat: { |
2382 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0), | 2341 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0), |
2383 i.InputSimd128Register(1), i.InputInt4(2)); | 2342 i.InputSimd128Register(1), i.InputInt4(2)); |
2384 break; | 2343 break; |
2385 } | 2344 } |
2386 case kArmS8x16Shuffle: { | 2345 case kArmS8x16Shuffle: { |
2387 Simd128Register dst = i.OutputSimd128Register(), | 2346 Simd128Register dst = i.OutputSimd128Register(), |
2388 src0 = i.InputSimd128Register(0), | 2347 src0 = i.InputSimd128Register(0), |
2389 src1 = i.InputSimd128Register(1); | 2348 src1 = i.InputSimd128Register(1); |
2390 DwVfpRegister table_base = src0.low(); | 2349 DwVfpRegister table_base = src0.low(); |
2391 int table_size = GetVtblTableSize(src0, src1); | 2350 // If unary shuffle, table is src0 (2 d-registers), otherwise src0 and |
| 2351 // src1. They must be consecutive. |
| 2352 int table_size = src0.is(src1) ? 2 : 4; |
| 2353 DCHECK_IMPLIES(!src0.is(src1), src0.code() + 1 == src1.code()); |
2392 // The shuffle lane mask is a byte mask, materialize in kScratchQuadReg. | 2354 // The shuffle lane mask is a byte mask, materialize in kScratchQuadReg. |
2393 int scratch_s_base = kScratchQuadReg.code() * 4; | 2355 int scratch_s_base = kScratchQuadReg.code() * 4; |
2394 for (int j = 0; j < 4; j++) { | 2356 for (int j = 0; j < 4; j++) { |
2395 int32_t four_lanes = i.InputInt32(2 + j); | 2357 int32_t four_lanes = i.InputInt32(2 + j); |
2396 // Ensure byte indices are in [0, 31] so masks are never NaNs. | 2358 // Ensure byte indices are in [0, 31] so masks are never NaNs. |
2397 four_lanes &= 0x1F1F1F1F; | 2359 four_lanes &= 0x1F1F1F1F; |
2398 __ vmov(SwVfpRegister::from_code(scratch_s_base + j), | 2360 __ vmov(SwVfpRegister::from_code(scratch_s_base + j), |
2399 bit_cast<float>(four_lanes)); | 2361 bit_cast<float>(four_lanes)); |
2400 } | 2362 } |
2401 NeonListOperand table(table_base, table_size); | 2363 NeonListOperand table(table_base, table_size); |
(...skipping 844 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3246 padding_size -= v8::internal::Assembler::kInstrSize; | 3208 padding_size -= v8::internal::Assembler::kInstrSize; |
3247 } | 3209 } |
3248 } | 3210 } |
3249 } | 3211 } |
3250 | 3212 |
3251 #undef __ | 3213 #undef __ |
3252 | 3214 |
3253 } // namespace compiler | 3215 } // namespace compiler |
3254 } // namespace internal | 3216 } // namespace internal |
3255 } // namespace v8 | 3217 } // namespace v8 |
OLD | NEW |