Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(316)

Side by Side Diff: src/compiler/arm/code-generator-arm.cc

Issue 2856363003: [ARM] Implement irregular vector shuffles for SIMD. (Closed)
Patch Set: Review comments. Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | src/compiler/arm/instruction-codes-arm.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 the V8 project authors. All rights reserved. 1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/compiler/code-generator.h" 5 #include "src/compiler/code-generator.h"
6 6
7 #include "src/arm/macro-assembler-arm.h" 7 #include "src/arm/macro-assembler-arm.h"
8 #include "src/assembler-inl.h" 8 #include "src/assembler-inl.h"
9 #include "src/compilation-info.h" 9 #include "src/compilation-info.h"
10 #include "src/compiler/code-generator-impl.h" 10 #include "src/compiler/code-generator-impl.h"
(...skipping 329 matching lines...) Expand 10 before | Expand all | Expand 10 after
340 return pl; 340 return pl;
341 case kNegative: 341 case kNegative:
342 return mi; 342 return mi;
343 default: 343 default:
344 break; 344 break;
345 } 345 }
346 UNREACHABLE(); 346 UNREACHABLE();
347 return kNoCondition; 347 return kNoCondition;
348 } 348 }
349 349
350 int GetVtblTableSize(const Simd128Register& src0, const Simd128Register& src1) {
351 // If unary shuffle, table is src0 (2 d-registers).
352 if (src0.is(src1)) return 2;
353 // Binary shuffle, table is src0, src1. They must be consecutive
354 DCHECK_EQ(src0.code() + 1, src1.code());
355 return 4; // 4 d-registers.
356 }
357
350 } // namespace 358 } // namespace
351 359
352 #define ASSEMBLE_CHECKED_LOAD_FP(Type) \ 360 #define ASSEMBLE_CHECKED_LOAD_FP(Type) \
353 do { \ 361 do { \
354 auto result = i.Output##Type##Register(); \ 362 auto result = i.Output##Type##Register(); \
355 auto offset = i.InputRegister(0); \ 363 auto offset = i.InputRegister(0); \
356 if (instr->InputAt(1)->IsRegister()) { \ 364 if (instr->InputAt(1)->IsRegister()) { \
357 __ cmp(offset, i.InputRegister(1)); \ 365 __ cmp(offset, i.InputRegister(1)); \
358 } else { \ 366 } else { \
359 __ cmp(offset, i.InputImmediate(1)); \ 367 __ cmp(offset, i.InputImmediate(1)); \
(...skipping 1819 matching lines...) Expand 10 before | Expand all | Expand 10 after
2179 } 2187 }
2180 case kArmS32x4TransposeLeft: { 2188 case kArmS32x4TransposeLeft: {
2181 Simd128Register dst = i.OutputSimd128Register(), 2189 Simd128Register dst = i.OutputSimd128Register(),
2182 src1 = i.InputSimd128Register(1); 2190 src1 = i.InputSimd128Register(1);
2183 DCHECK(dst.is(i.InputSimd128Register(0))); 2191 DCHECK(dst.is(i.InputSimd128Register(0)));
2184 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7] 2192 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2185 __ vmov(kScratchQuadReg, src1); 2193 __ vmov(kScratchQuadReg, src1);
2186 __ vtrn(Neon32, dst, kScratchQuadReg); // dst = [0, 4, 2, 6] 2194 __ vtrn(Neon32, dst, kScratchQuadReg); // dst = [0, 4, 2, 6]
2187 break; 2195 break;
2188 } 2196 }
2197 case kArmS32x4Shuffle: {
2198 Simd128Register dst = i.OutputSimd128Register(),
2199 src0 = i.InputSimd128Register(0),
2200 src1 = i.InputSimd128Register(1);
2201 // Check for in-place shuffles.
2202 // If dst == src0 == src1, then the shuffle is unary and we only use src0.
2203 if (dst.is(src0)) {
2204 __ vmov(kScratchQuadReg, src0);
2205 src0 = kScratchQuadReg;
2206 } else if (dst.is(src1)) {
2207 __ vmov(kScratchQuadReg, src1);
2208 src1 = kScratchQuadReg;
2209 }
2210 // Perform shuffle as a vmov per lane.
2211 int dst_code = dst.code() * 4;
2212 int src0_code = src0.code() * 4;
2213 int src1_code = src1.code() * 4;
2214 int32_t shuffle = i.InputInt32(2);
2215 for (int i = 0; i < 4; i++) {
2216 int lane = shuffle & 0x7;
2217 int src_code = src0_code;
2218 if (lane >= 4) {
2219 src_code = src1_code;
2220 lane &= 0x3;
2221 }
2222 __ VmovExtended(dst_code + i, src_code + lane, kScratchReg);
2223 shuffle >>= 8;
2224 }
2225 break;
2226 }
2189 case kArmS32x4TransposeRight: { 2227 case kArmS32x4TransposeRight: {
2190 Simd128Register dst = i.OutputSimd128Register(), 2228 Simd128Register dst = i.OutputSimd128Register(),
2191 src1 = i.InputSimd128Register(1); 2229 src1 = i.InputSimd128Register(1);
2192 DCHECK(dst.is(i.InputSimd128Register(0))); 2230 DCHECK(dst.is(i.InputSimd128Register(0)));
2193 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft). 2231 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft).
2194 __ vmov(kScratchQuadReg, src1); 2232 __ vmov(kScratchQuadReg, src1);
2195 __ vtrn(Neon32, kScratchQuadReg, dst); // dst = [1, 5, 3, 7] 2233 __ vtrn(Neon32, kScratchQuadReg, dst); // dst = [1, 5, 3, 7]
2196 break; 2234 break;
2197 } 2235 }
2198 case kArmS16x8ZipLeft: { 2236 case kArmS16x8ZipLeft: {
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
2242 } 2280 }
2243 case kArmS16x8TransposeRight: { 2281 case kArmS16x8TransposeRight: {
2244 Simd128Register dst = i.OutputSimd128Register(), 2282 Simd128Register dst = i.OutputSimd128Register(),
2245 src1 = i.InputSimd128Register(1); 2283 src1 = i.InputSimd128Register(1);
2246 DCHECK(dst.is(i.InputSimd128Register(0))); 2284 DCHECK(dst.is(i.InputSimd128Register(0)));
2247 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped). 2285 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
2248 __ vmov(kScratchQuadReg, src1); 2286 __ vmov(kScratchQuadReg, src1);
2249 __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15] 2287 __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15]
2250 break; 2288 break;
2251 } 2289 }
2290 case kArmS16x8Shuffle: {
2291 Simd128Register dst = i.OutputSimd128Register(),
2292 src0 = i.InputSimd128Register(0),
2293 src1 = i.InputSimd128Register(1);
2294 DwVfpRegister table_base = src0.low();
2295 int table_size = GetVtblTableSize(src0, src1);
2296 // Convert the shuffle lane masks to byte masks in kScratchQuadReg.
2297 int scratch_s_base = kScratchQuadReg.code() * 4;
2298 for (int j = 0; j < 2; j++) {
2299 int32_t four_lanes = i.InputInt32(2 + j);
2300 for (int k = 0; k < 2; k++) {
2301 uint8_t w0 = (four_lanes & 0xF) * kShortSize;
2302 four_lanes >>= 8;
2303 uint8_t w1 = (four_lanes & 0xF) * kShortSize;
2304 four_lanes >>= 8;
2305 int32_t mask = w0 | ((w0 + 1) << 8) | (w1 << 16) | ((w1 + 1) << 24);
2306 // Ensure byte indices are in [0, 31] so masks are never NaNs.
2307 four_lanes &= 0x1F1F1F1F;
2308 __ vmov(SwVfpRegister::from_code(scratch_s_base + 2 * j + k),
2309 bit_cast<float>(mask));
2310 }
2311 }
2312 NeonListOperand table(table_base, table_size);
2313 if (!dst.is(src0) && !dst.is(src1)) {
2314 __ vtbl(dst.low(), table, kScratchQuadReg.low());
2315 __ vtbl(dst.high(), table, kScratchQuadReg.high());
2316 } else {
2317 __ vtbl(kScratchQuadReg.low(), table, kScratchQuadReg.low());
2318 __ vtbl(kScratchQuadReg.high(), table, kScratchQuadReg.high());
2319 __ vmov(dst, kScratchQuadReg);
2320 }
2321 break;
2322 }
2252 case kArmS8x16ZipLeft: { 2323 case kArmS8x16ZipLeft: {
2253 Simd128Register dst = i.OutputSimd128Register(), 2324 Simd128Register dst = i.OutputSimd128Register(),
2254 src1 = i.InputSimd128Register(1); 2325 src1 = i.InputSimd128Register(1);
2255 DCHECK(dst.is(i.InputSimd128Register(0))); 2326 DCHECK(dst.is(i.InputSimd128Register(0)));
2256 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31] 2327 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
2257 __ vmov(dst.high(), src1.low()); 2328 __ vmov(dst.high(), src1.low());
2258 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23] 2329 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23]
2259 break; 2330 break;
2260 } 2331 }
2261 case kArmS8x16ZipRight: { 2332 case kArmS8x16ZipRight: {
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
2301 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped). 2372 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
2302 __ vmov(kScratchQuadReg, src1); 2373 __ vmov(kScratchQuadReg, src1);
2303 __ vtrn(Neon8, kScratchQuadReg, dst); // dst = [1, 17, 3, 19, ... 31] 2374 __ vtrn(Neon8, kScratchQuadReg, dst); // dst = [1, 17, 3, 19, ... 31]
2304 break; 2375 break;
2305 } 2376 }
2306 case kArmS8x16Concat: { 2377 case kArmS8x16Concat: {
2307 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0), 2378 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0),
2308 i.InputSimd128Register(1), i.InputInt4(2)); 2379 i.InputSimd128Register(1), i.InputInt4(2));
2309 break; 2380 break;
2310 } 2381 }
2382 case kArmS8x16Shuffle: {
2383 Simd128Register dst = i.OutputSimd128Register(),
2384 src0 = i.InputSimd128Register(0),
2385 src1 = i.InputSimd128Register(1);
2386 DwVfpRegister table_base = src0.low();
2387 int table_size = GetVtblTableSize(src0, src1);
2388 // The shuffle lane mask is a byte mask, materialize in kScratchQuadReg.
2389 int scratch_s_base = kScratchQuadReg.code() * 4;
2390 for (int j = 0; j < 4; j++) {
2391 int32_t four_lanes = i.InputInt32(2 + j);
2392 // Ensure byte indices are in [0, 31] so masks are never NaNs.
2393 four_lanes &= 0x1F1F1F1F;
2394 __ vmov(SwVfpRegister::from_code(scratch_s_base + j),
2395 bit_cast<float>(four_lanes));
2396 }
2397 NeonListOperand table(table_base, table_size);
2398 if (!dst.is(src0) && !dst.is(src1)) {
2399 __ vtbl(dst.low(), table, kScratchQuadReg.low());
2400 __ vtbl(dst.high(), table, kScratchQuadReg.high());
2401 } else {
2402 __ vtbl(kScratchQuadReg.low(), table, kScratchQuadReg.low());
2403 __ vtbl(kScratchQuadReg.high(), table, kScratchQuadReg.high());
2404 __ vmov(dst, kScratchQuadReg);
2405 }
2406 break;
2407 }
2311 case kArmS32x2Reverse: { 2408 case kArmS32x2Reverse: {
2312 __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0)); 2409 __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
2313 break; 2410 break;
2314 } 2411 }
2315 case kArmS16x4Reverse: { 2412 case kArmS16x4Reverse: {
2316 __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); 2413 __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2317 break; 2414 break;
2318 } 2415 }
2319 case kArmS16x2Reverse: { 2416 case kArmS16x2Reverse: {
2320 __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); 2417 __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
(...skipping 826 matching lines...) Expand 10 before | Expand all | Expand 10 after
3147 padding_size -= v8::internal::Assembler::kInstrSize; 3244 padding_size -= v8::internal::Assembler::kInstrSize;
3148 } 3245 }
3149 } 3246 }
3150 } 3247 }
3151 3248
3152 #undef __ 3249 #undef __
3153 3250
3154 } // namespace compiler 3251 } // namespace compiler
3155 } // namespace internal 3252 } // namespace internal
3156 } // namespace v8 3253 } // namespace v8
OLDNEW
« no previous file with comments | « no previous file | src/compiler/arm/instruction-codes-arm.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698