Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/compiler/code-generator.h" | 5 #include "src/compiler/code-generator.h" |
| 6 | 6 |
| 7 #include "src/arm/macro-assembler-arm.h" | 7 #include "src/arm/macro-assembler-arm.h" |
| 8 #include "src/assembler-inl.h" | 8 #include "src/assembler-inl.h" |
| 9 #include "src/compilation-info.h" | 9 #include "src/compilation-info.h" |
| 10 #include "src/compiler/code-generator-impl.h" | 10 #include "src/compiler/code-generator-impl.h" |
| (...skipping 329 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 340 return pl; | 340 return pl; |
| 341 case kNegative: | 341 case kNegative: |
| 342 return mi; | 342 return mi; |
| 343 default: | 343 default: |
| 344 break; | 344 break; |
| 345 } | 345 } |
| 346 UNREACHABLE(); | 346 UNREACHABLE(); |
| 347 return kNoCondition; | 347 return kNoCondition; |
| 348 } | 348 } |
| 349 | 349 |
| 350 int GetVtblTableSize(const Simd128Register& src0, const Simd128Register& src1) { | |
| 351 // If unary shuffle, table is src0 (2 d-registers). | |
| 352 if (src0.is(src1)) return 2; | |
| 353 // Binary shuffle, table is src0, src1. They must be consecutive | |
| 354 DCHECK_EQ(src0.code() + 1, src1.code()); | |
| 355 return 4; // 4 d-registers. | |
| 356 } | |
| 357 | |
| 350 } // namespace | 358 } // namespace |
| 351 | 359 |
| 352 #define ASSEMBLE_CHECKED_LOAD_FP(Type) \ | 360 #define ASSEMBLE_CHECKED_LOAD_FP(Type) \ |
| 353 do { \ | 361 do { \ |
| 354 auto result = i.Output##Type##Register(); \ | 362 auto result = i.Output##Type##Register(); \ |
| 355 auto offset = i.InputRegister(0); \ | 363 auto offset = i.InputRegister(0); \ |
| 356 if (instr->InputAt(1)->IsRegister()) { \ | 364 if (instr->InputAt(1)->IsRegister()) { \ |
| 357 __ cmp(offset, i.InputRegister(1)); \ | 365 __ cmp(offset, i.InputRegister(1)); \ |
| 358 } else { \ | 366 } else { \ |
| 359 __ cmp(offset, i.InputImmediate(1)); \ | 367 __ cmp(offset, i.InputImmediate(1)); \ |
| (...skipping 1819 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2179 } | 2187 } |
| 2180 case kArmS32x4TransposeLeft: { | 2188 case kArmS32x4TransposeLeft: { |
| 2181 Simd128Register dst = i.OutputSimd128Register(), | 2189 Simd128Register dst = i.OutputSimd128Register(), |
| 2182 src1 = i.InputSimd128Register(1); | 2190 src1 = i.InputSimd128Register(1); |
| 2183 DCHECK(dst.is(i.InputSimd128Register(0))); | 2191 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2184 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7] | 2192 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7] |
| 2185 __ vmov(kScratchQuadReg, src1); | 2193 __ vmov(kScratchQuadReg, src1); |
| 2186 __ vtrn(Neon32, dst, kScratchQuadReg); // dst = [0, 4, 2, 6] | 2194 __ vtrn(Neon32, dst, kScratchQuadReg); // dst = [0, 4, 2, 6] |
| 2187 break; | 2195 break; |
| 2188 } | 2196 } |
| 2197 case kArmS32x4Shuffle: { | |
| 2198 Simd128Register dst = i.OutputSimd128Register(), | |
| 2199 src0 = i.InputSimd128Register(0), | |
| 2200 src1 = i.InputSimd128Register(1); | |
| 2201 // Check for in-place shuffles. | |
| 2202 // If dst == src0 == src1, then the shuffle is unary and we only use src0. | |
| 2203 if (dst.is(src0)) { | |
| 2204 __ vmov(kScratchQuadReg, src0); | |
| 2205 src0 = kScratchQuadReg; | |
| 2206 } else if (dst.is(src1)) { | |
| 2207 __ vmov(kScratchQuadReg, src1); | |
| 2208 src1 = kScratchQuadReg; | |
| 2209 } | |
| 2210 // Perform shuffle as a vmov per lane. | |
| 2211 int dst_code = dst.code() * 4; | |
| 2212 int src0_code = src0.code() * 4; | |
| 2213 int src1_code = src1.code() * 4; | |
| 2214 int32_t shuffle = i.InputInt32(2); | |
| 2215 for (int i = 0; i < 4; i++) { | |
| 2216 int lane = shuffle & 0x7; | |
| 2217 int src_code = src0_code; | |
| 2218 if (lane >= 4) { | |
| 2219 src_code = src1_code; | |
| 2220 lane &= 0x3; | |
| 2221 } | |
| 2222 __ VmovExtended(dst_code + i, src_code + lane, kScratchReg); | |
|
martyn.capewell
2017/05/05 14:28:49
This will become expensive when each s-register mo
bbudge
2017/05/05 20:36:28
Yes, I think I have a TODO to improve VMovExtended
| |
| 2223 shuffle >>= 8; | |
| 2224 } | |
| 2225 break; | |
| 2226 } | |
| 2189 case kArmS32x4TransposeRight: { | 2227 case kArmS32x4TransposeRight: { |
| 2190 Simd128Register dst = i.OutputSimd128Register(), | 2228 Simd128Register dst = i.OutputSimd128Register(), |
| 2191 src1 = i.InputSimd128Register(1); | 2229 src1 = i.InputSimd128Register(1); |
| 2192 DCHECK(dst.is(i.InputSimd128Register(0))); | 2230 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2193 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft). | 2231 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft). |
| 2194 __ vmov(kScratchQuadReg, src1); | 2232 __ vmov(kScratchQuadReg, src1); |
| 2195 __ vtrn(Neon32, kScratchQuadReg, dst); // dst = [1, 5, 3, 7] | 2233 __ vtrn(Neon32, kScratchQuadReg, dst); // dst = [1, 5, 3, 7] |
| 2196 break; | 2234 break; |
| 2197 } | 2235 } |
| 2198 case kArmS16x8ZipLeft: { | 2236 case kArmS16x8ZipLeft: { |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2242 } | 2280 } |
| 2243 case kArmS16x8TransposeRight: { | 2281 case kArmS16x8TransposeRight: { |
| 2244 Simd128Register dst = i.OutputSimd128Register(), | 2282 Simd128Register dst = i.OutputSimd128Register(), |
| 2245 src1 = i.InputSimd128Register(1); | 2283 src1 = i.InputSimd128Register(1); |
| 2246 DCHECK(dst.is(i.InputSimd128Register(0))); | 2284 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2247 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped). | 2285 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped). |
| 2248 __ vmov(kScratchQuadReg, src1); | 2286 __ vmov(kScratchQuadReg, src1); |
| 2249 __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15] | 2287 __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15] |
| 2250 break; | 2288 break; |
| 2251 } | 2289 } |
| 2290 case kArmS16x8Shuffle: { | |
| 2291 Simd128Register dst = i.OutputSimd128Register(), | |
| 2292 src0 = i.InputSimd128Register(0), | |
| 2293 src1 = i.InputSimd128Register(1); | |
| 2294 DwVfpRegister table_base = src0.low(); | |
| 2295 int table_size = GetVtblTableSize(src0, src1); | |
| 2296 // Convert the shuffle lane masks to byte masks in kScratchQuadReg. | |
| 2297 int scratch_s_base = kScratchQuadReg.code() * 4; | |
| 2298 for (int j = 0; j < 2; j++) { | |
| 2299 int32_t four_lanes = i.InputInt32(2 + j); | |
| 2300 for (int k = 0; k < 2; k++) { | |
| 2301 uint8_t w0 = (four_lanes & 0xFF) * kShortSize; | |
| 2302 four_lanes >>= 8; | |
| 2303 uint8_t w1 = (four_lanes & 0xFF) * kShortSize; | |
| 2304 four_lanes >>= 8; | |
| 2305 int32_t mask = w0 | ((w0 + 1) << 8) | (w1 << 16) | ((w1 + 1) << 24); | |
| 2306 __ vmov(SwVfpRegister::from_code(scratch_s_base + 2 * j + k), | |
| 2307 bit_cast<float>(mask)); | |
|
martyn.capewell
2017/05/05 14:28:49
This may cause a problem - if your mask looks like
bbudge
2017/05/05 20:36:28
I could also finesse the code so NaNs can't be gen
martyn.capewell
2017/05/08 13:33:53
That will fix the NaN problem. However, the assemb
| |
| 2308 } | |
| 2309 } | |
| 2310 NeonListOperand table(table_base, table_size); | |
| 2311 __ vtbl(kScratchQuadReg.low(), table, kScratchQuadReg.low()); | |
| 2312 __ vtbl(kScratchQuadReg.high(), table, kScratchQuadReg.high()); | |
| 2313 __ vmov(dst, kScratchQuadReg); | |
|
martyn.capewell
2017/05/05 14:28:49
If you know dst doesn't alias src0 or src1, vtbl c
bbudge
2017/05/05 20:36:28
Nice, done.
| |
| 2314 break; | |
| 2315 } | |
| 2252 case kArmS8x16ZipLeft: { | 2316 case kArmS8x16ZipLeft: { |
| 2253 Simd128Register dst = i.OutputSimd128Register(), | 2317 Simd128Register dst = i.OutputSimd128Register(), |
| 2254 src1 = i.InputSimd128Register(1); | 2318 src1 = i.InputSimd128Register(1); |
| 2255 DCHECK(dst.is(i.InputSimd128Register(0))); | 2319 DCHECK(dst.is(i.InputSimd128Register(0))); |
| 2256 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31] | 2320 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31] |
| 2257 __ vmov(dst.high(), src1.low()); | 2321 __ vmov(dst.high(), src1.low()); |
| 2258 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23] | 2322 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23] |
| 2259 break; | 2323 break; |
| 2260 } | 2324 } |
| 2261 case kArmS8x16ZipRight: { | 2325 case kArmS8x16ZipRight: { |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2301 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped). | 2365 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped). |
| 2302 __ vmov(kScratchQuadReg, src1); | 2366 __ vmov(kScratchQuadReg, src1); |
| 2303 __ vtrn(Neon8, kScratchQuadReg, dst); // dst = [1, 17, 3, 19, ... 31] | 2367 __ vtrn(Neon8, kScratchQuadReg, dst); // dst = [1, 17, 3, 19, ... 31] |
| 2304 break; | 2368 break; |
| 2305 } | 2369 } |
| 2306 case kArmS8x16Concat: { | 2370 case kArmS8x16Concat: { |
| 2307 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0), | 2371 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0), |
| 2308 i.InputSimd128Register(1), i.InputInt4(2)); | 2372 i.InputSimd128Register(1), i.InputInt4(2)); |
| 2309 break; | 2373 break; |
| 2310 } | 2374 } |
| 2375 case kArmS8x16Shuffle: { | |
| 2376 Simd128Register dst = i.OutputSimd128Register(), | |
| 2377 src0 = i.InputSimd128Register(0), | |
| 2378 src1 = i.InputSimd128Register(1); | |
| 2379 DwVfpRegister table_base = src0.low(); | |
| 2380 int table_size = GetVtblTableSize(src0, src1); | |
| 2381 // The shuffle lane mask is a byte mask, materialize in kScratchQuadReg. | |
| 2382 int scratch_s_base = kScratchQuadReg.code() * 4; | |
| 2383 for (int j = 0; j < 4; j++) { | |
| 2384 int32_t four_lanes = i.InputInt32(2 + j); | |
| 2385 __ vmov(SwVfpRegister::from_code(scratch_s_base + j), | |
| 2386 bit_cast<float>(four_lanes)); | |
| 2387 } | |
| 2388 NeonListOperand table(table_base, table_size); | |
| 2389 __ vtbl(kScratchQuadReg.low(), table, kScratchQuadReg.low()); | |
| 2390 __ vtbl(kScratchQuadReg.high(), table, kScratchQuadReg.high()); | |
| 2391 __ vmov(dst, kScratchQuadReg); | |
| 2392 break; | |
| 2393 } | |
| 2311 case kArmS32x2Reverse: { | 2394 case kArmS32x2Reverse: { |
| 2312 __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0)); | 2395 __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| 2313 break; | 2396 break; |
| 2314 } | 2397 } |
| 2315 case kArmS16x4Reverse: { | 2398 case kArmS16x4Reverse: { |
| 2316 __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); | 2399 __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| 2317 break; | 2400 break; |
| 2318 } | 2401 } |
| 2319 case kArmS16x2Reverse: { | 2402 case kArmS16x2Reverse: { |
| 2320 __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); | 2403 __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| (...skipping 826 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3147 padding_size -= v8::internal::Assembler::kInstrSize; | 3230 padding_size -= v8::internal::Assembler::kInstrSize; |
| 3148 } | 3231 } |
| 3149 } | 3232 } |
| 3150 } | 3233 } |
| 3151 | 3234 |
| 3152 #undef __ | 3235 #undef __ |
| 3153 | 3236 |
| 3154 } // namespace compiler | 3237 } // namespace compiler |
| 3155 } // namespace internal | 3238 } // namespace internal |
| 3156 } // namespace v8 | 3239 } // namespace v8 |
| OLD | NEW |