OLD | NEW |
---|---|
1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/compiler/code-generator.h" | 5 #include "src/compiler/code-generator.h" |
6 | 6 |
7 #include "src/arm/macro-assembler-arm.h" | 7 #include "src/arm/macro-assembler-arm.h" |
8 #include "src/assembler-inl.h" | 8 #include "src/assembler-inl.h" |
9 #include "src/compilation-info.h" | 9 #include "src/compilation-info.h" |
10 #include "src/compiler/code-generator-impl.h" | 10 #include "src/compiler/code-generator-impl.h" |
(...skipping 329 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
340 return pl; | 340 return pl; |
341 case kNegative: | 341 case kNegative: |
342 return mi; | 342 return mi; |
343 default: | 343 default: |
344 break; | 344 break; |
345 } | 345 } |
346 UNREACHABLE(); | 346 UNREACHABLE(); |
347 return kNoCondition; | 347 return kNoCondition; |
348 } | 348 } |
349 | 349 |
350 int GetVtblTableSize(const Simd128Register& src0, const Simd128Register& src1) { | |
351 // If unary shuffle, table is src0 (2 d-registers). | |
352 if (src0.is(src1)) return 2; | |
353 // Binary shuffle, table is src0, src1. They must be consecutive | |
354 DCHECK_EQ(src0.code() + 1, src1.code()); | |
355 return 4; // 4 d-registers. | |
356 } | |
357 | |
350 } // namespace | 358 } // namespace |
351 | 359 |
352 #define ASSEMBLE_CHECKED_LOAD_FP(Type) \ | 360 #define ASSEMBLE_CHECKED_LOAD_FP(Type) \ |
353 do { \ | 361 do { \ |
354 auto result = i.Output##Type##Register(); \ | 362 auto result = i.Output##Type##Register(); \ |
355 auto offset = i.InputRegister(0); \ | 363 auto offset = i.InputRegister(0); \ |
356 if (instr->InputAt(1)->IsRegister()) { \ | 364 if (instr->InputAt(1)->IsRegister()) { \ |
357 __ cmp(offset, i.InputRegister(1)); \ | 365 __ cmp(offset, i.InputRegister(1)); \ |
358 } else { \ | 366 } else { \ |
359 __ cmp(offset, i.InputImmediate(1)); \ | 367 __ cmp(offset, i.InputImmediate(1)); \ |
(...skipping 1819 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2179 } | 2187 } |
2180 case kArmS32x4TransposeLeft: { | 2188 case kArmS32x4TransposeLeft: { |
2181 Simd128Register dst = i.OutputSimd128Register(), | 2189 Simd128Register dst = i.OutputSimd128Register(), |
2182 src1 = i.InputSimd128Register(1); | 2190 src1 = i.InputSimd128Register(1); |
2183 DCHECK(dst.is(i.InputSimd128Register(0))); | 2191 DCHECK(dst.is(i.InputSimd128Register(0))); |
2184 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7] | 2192 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7] |
2185 __ vmov(kScratchQuadReg, src1); | 2193 __ vmov(kScratchQuadReg, src1); |
2186 __ vtrn(Neon32, dst, kScratchQuadReg); // dst = [0, 4, 2, 6] | 2194 __ vtrn(Neon32, dst, kScratchQuadReg); // dst = [0, 4, 2, 6] |
2187 break; | 2195 break; |
2188 } | 2196 } |
2197 case kArmS32x4Shuffle: { | |
2198 Simd128Register dst = i.OutputSimd128Register(), | |
2199 src0 = i.InputSimd128Register(0), | |
2200 src1 = i.InputSimd128Register(1); | |
2201 // Check for in-place shuffles. | |
2202 // If dst == src0 == src1, then the shuffle is unary and we only use src0. | |
2203 if (dst.is(src0)) { | |
2204 __ vmov(kScratchQuadReg, src0); | |
2205 src0 = kScratchQuadReg; | |
2206 } else if (dst.is(src1)) { | |
2207 __ vmov(kScratchQuadReg, src1); | |
2208 src1 = kScratchQuadReg; | |
2209 } | |
2210 // Perform shuffle as a vmov per lane. | |
2211 int dst_code = dst.code() * 4; | |
2212 int src0_code = src0.code() * 4; | |
2213 int src1_code = src1.code() * 4; | |
2214 int32_t shuffle = i.InputInt32(2); | |
2215 for (int i = 0; i < 4; i++) { | |
2216 int lane = shuffle & 0x7; | |
2217 int src_code = src0_code; | |
2218 if (lane >= 4) { | |
2219 src_code = src1_code; | |
2220 lane &= 0x3; | |
2221 } | |
2222 __ VmovExtended(dst_code + i, src_code + lane, kScratchReg); | |
martyn.capewell
2017/05/05 14:28:49
This will become expensive when each s-register mo
bbudge
2017/05/05 20:36:28
Yes, I think I have a TODO to improve VMovExtended
| |
2223 shuffle >>= 8; | |
2224 } | |
2225 break; | |
2226 } | |
2189 case kArmS32x4TransposeRight: { | 2227 case kArmS32x4TransposeRight: { |
2190 Simd128Register dst = i.OutputSimd128Register(), | 2228 Simd128Register dst = i.OutputSimd128Register(), |
2191 src1 = i.InputSimd128Register(1); | 2229 src1 = i.InputSimd128Register(1); |
2192 DCHECK(dst.is(i.InputSimd128Register(0))); | 2230 DCHECK(dst.is(i.InputSimd128Register(0))); |
2193 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft). | 2231 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft). |
2194 __ vmov(kScratchQuadReg, src1); | 2232 __ vmov(kScratchQuadReg, src1); |
2195 __ vtrn(Neon32, kScratchQuadReg, dst); // dst = [1, 5, 3, 7] | 2233 __ vtrn(Neon32, kScratchQuadReg, dst); // dst = [1, 5, 3, 7] |
2196 break; | 2234 break; |
2197 } | 2235 } |
2198 case kArmS16x8ZipLeft: { | 2236 case kArmS16x8ZipLeft: { |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2242 } | 2280 } |
2243 case kArmS16x8TransposeRight: { | 2281 case kArmS16x8TransposeRight: { |
2244 Simd128Register dst = i.OutputSimd128Register(), | 2282 Simd128Register dst = i.OutputSimd128Register(), |
2245 src1 = i.InputSimd128Register(1); | 2283 src1 = i.InputSimd128Register(1); |
2246 DCHECK(dst.is(i.InputSimd128Register(0))); | 2284 DCHECK(dst.is(i.InputSimd128Register(0))); |
2247 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped). | 2285 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped). |
2248 __ vmov(kScratchQuadReg, src1); | 2286 __ vmov(kScratchQuadReg, src1); |
2249 __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15] | 2287 __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15] |
2250 break; | 2288 break; |
2251 } | 2289 } |
2290 case kArmS16x8Shuffle: { | |
2291 Simd128Register dst = i.OutputSimd128Register(), | |
2292 src0 = i.InputSimd128Register(0), | |
2293 src1 = i.InputSimd128Register(1); | |
2294 DwVfpRegister table_base = src0.low(); | |
2295 int table_size = GetVtblTableSize(src0, src1); | |
2296 // Convert the shuffle lane masks to byte masks in kScratchQuadReg. | |
2297 int scratch_s_base = kScratchQuadReg.code() * 4; | |
2298 for (int j = 0; j < 2; j++) { | |
2299 int32_t four_lanes = i.InputInt32(2 + j); | |
2300 for (int k = 0; k < 2; k++) { | |
2301 uint8_t w0 = (four_lanes & 0xFF) * kShortSize; | |
2302 four_lanes >>= 8; | |
2303 uint8_t w1 = (four_lanes & 0xFF) * kShortSize; | |
2304 four_lanes >>= 8; | |
2305 int32_t mask = w0 | ((w0 + 1) << 8) | (w1 << 16) | ((w1 + 1) << 24); | |
2306 __ vmov(SwVfpRegister::from_code(scratch_s_base + 2 * j + k), | |
2307 bit_cast<float>(mask)); | |
martyn.capewell
2017/05/05 14:28:49
This may cause a problem - if your mask looks like
bbudge
2017/05/05 20:36:28
I could also finesse the code so NaNs can't be gen
martyn.capewell
2017/05/08 13:33:53
That will fix the NaN problem. However, the assemb
| |
2308 } | |
2309 } | |
2310 NeonListOperand table(table_base, table_size); | |
2311 __ vtbl(kScratchQuadReg.low(), table, kScratchQuadReg.low()); | |
2312 __ vtbl(kScratchQuadReg.high(), table, kScratchQuadReg.high()); | |
2313 __ vmov(dst, kScratchQuadReg); | |
martyn.capewell
2017/05/05 14:28:49
If you know dst doesn't alias src0 or src1, vtbl c
bbudge
2017/05/05 20:36:28
Nice, done.
| |
2314 break; | |
2315 } | |
2252 case kArmS8x16ZipLeft: { | 2316 case kArmS8x16ZipLeft: { |
2253 Simd128Register dst = i.OutputSimd128Register(), | 2317 Simd128Register dst = i.OutputSimd128Register(), |
2254 src1 = i.InputSimd128Register(1); | 2318 src1 = i.InputSimd128Register(1); |
2255 DCHECK(dst.is(i.InputSimd128Register(0))); | 2319 DCHECK(dst.is(i.InputSimd128Register(0))); |
2256 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31] | 2320 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31] |
2257 __ vmov(dst.high(), src1.low()); | 2321 __ vmov(dst.high(), src1.low()); |
2258 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23] | 2322 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23] |
2259 break; | 2323 break; |
2260 } | 2324 } |
2261 case kArmS8x16ZipRight: { | 2325 case kArmS8x16ZipRight: { |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2301 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped). | 2365 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped). |
2302 __ vmov(kScratchQuadReg, src1); | 2366 __ vmov(kScratchQuadReg, src1); |
2303 __ vtrn(Neon8, kScratchQuadReg, dst); // dst = [1, 17, 3, 19, ... 31] | 2367 __ vtrn(Neon8, kScratchQuadReg, dst); // dst = [1, 17, 3, 19, ... 31] |
2304 break; | 2368 break; |
2305 } | 2369 } |
2306 case kArmS8x16Concat: { | 2370 case kArmS8x16Concat: { |
2307 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0), | 2371 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0), |
2308 i.InputSimd128Register(1), i.InputInt4(2)); | 2372 i.InputSimd128Register(1), i.InputInt4(2)); |
2309 break; | 2373 break; |
2310 } | 2374 } |
2375 case kArmS8x16Shuffle: { | |
2376 Simd128Register dst = i.OutputSimd128Register(), | |
2377 src0 = i.InputSimd128Register(0), | |
2378 src1 = i.InputSimd128Register(1); | |
2379 DwVfpRegister table_base = src0.low(); | |
2380 int table_size = GetVtblTableSize(src0, src1); | |
2381 // The shuffle lane mask is a byte mask, materialize in kScratchQuadReg. | |
2382 int scratch_s_base = kScratchQuadReg.code() * 4; | |
2383 for (int j = 0; j < 4; j++) { | |
2384 int32_t four_lanes = i.InputInt32(2 + j); | |
2385 __ vmov(SwVfpRegister::from_code(scratch_s_base + j), | |
2386 bit_cast<float>(four_lanes)); | |
2387 } | |
2388 NeonListOperand table(table_base, table_size); | |
2389 __ vtbl(kScratchQuadReg.low(), table, kScratchQuadReg.low()); | |
2390 __ vtbl(kScratchQuadReg.high(), table, kScratchQuadReg.high()); | |
2391 __ vmov(dst, kScratchQuadReg); | |
2392 break; | |
2393 } | |
2311 case kArmS32x2Reverse: { | 2394 case kArmS32x2Reverse: { |
2312 __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0)); | 2395 __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2313 break; | 2396 break; |
2314 } | 2397 } |
2315 case kArmS16x4Reverse: { | 2398 case kArmS16x4Reverse: { |
2316 __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); | 2399 __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
2317 break; | 2400 break; |
2318 } | 2401 } |
2319 case kArmS16x2Reverse: { | 2402 case kArmS16x2Reverse: { |
2320 __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); | 2403 __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
(...skipping 826 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3147 padding_size -= v8::internal::Assembler::kInstrSize; | 3230 padding_size -= v8::internal::Assembler::kInstrSize; |
3148 } | 3231 } |
3149 } | 3232 } |
3150 } | 3233 } |
3151 | 3234 |
3152 #undef __ | 3235 #undef __ |
3153 | 3236 |
3154 } // namespace compiler | 3237 } // namespace compiler |
3155 } // namespace internal | 3238 } // namespace internal |
3156 } // namespace v8 | 3239 } // namespace v8 |
OLD | NEW |