Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(406)

Unified Diff: src/compiler/arm/code-generator-arm.cc

Issue 2801183002: [WASM SIMD] Implement primitive shuffles. (Closed)
Patch Set: Fix ARM release build. Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/arm/simulator-arm.cc ('k') | src/compiler/arm/instruction-codes-arm.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/compiler/arm/code-generator-arm.cc
diff --git a/src/compiler/arm/code-generator-arm.cc b/src/compiler/arm/code-generator-arm.cc
index bd2a942bcc249df170989cde9bdde65cfc771ae8..c25aaf3bccdf72140e8feec05912002427848064 100644
--- a/src/compiler/arm/code-generator-arm.cc
+++ b/src/compiler/arm/code-generator-arm.cc
@@ -2028,6 +2028,198 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(2));
break;
}
+ case kArmS32x4ZipLeft: {
martyn.capewell 2017/04/10 16:46:55 It might be helpful to add a comment to each of th
bbudge 2017/04/10 23:26:47 Here's what this is supposed to do: src0 = 0,1,2,3
+ Simd128Register dst = i.OutputSimd128Register(),
+ src1 = i.InputSimd128Register(1);
+ DCHECK(dst.is(i.InputSimd128Register(0)));
+ __ vmov(dst.high(), src1.low());
martyn.capewell 2017/04/10 16:46:55 If the assembler supports it, you could use: vdu
+ __ vzip(Neon32, dst.low(), dst.high());
+ break;
+ }
+ case kArmS32x4ZipRight: {
martyn.capewell 2017/04/10 16:46:55 vdup.32 dst.low, src.high[0] vdup.32 dst.high, s
+ Simd128Register dst = i.OutputSimd128Register(),
+ src1 = i.InputSimd128Register(1);
+ DCHECK(dst.is(i.InputSimd128Register(0)));
+ __ vmov(dst.low(), src1.high());
+ __ vzip(Neon32, dst.low(), dst.high());
+ break;
+ }
+ case kArmS32x4UnzipLeft: {
martyn.capewell 2017/04/10 16:46:55 vmovn.i64 dst.low, src vmov dst.high, dst.low
+ Simd128Register dst = i.OutputSimd128Register(),
+ src1 = i.InputSimd128Register(1);
+ DCHECK(dst.is(i.InputSimd128Register(0)));
+ __ vmov(kScratchQuadReg, src1);
+ __ vuzp(Neon32, dst, kScratchQuadReg);
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
+ break;
+ }
+ case kArmS32x4UnzipRight: {
martyn.capewell 2017/04/10 16:46:55 vshrn.i64 dst.low, src, #32 vmov dst.high, dst.l
+ Simd128Register dst = i.OutputSimd128Register(),
+ src1 = i.InputSimd128Register(1);
+ DCHECK(dst.is(i.InputSimd128Register(0)));
+ __ vmov(kScratchQuadReg, src1);
+ __ vuzp(Neon32, kScratchQuadReg, dst);
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
+ break;
+ }
+ case kArmS32x4TransposeLeft: {
martyn.capewell 2017/04/10 16:46:55 vdup.32 dst.low, src.low[0] vdup.32 dst.high, sr
+ Simd128Register dst = i.OutputSimd128Register(),
+ src1 = i.InputSimd128Register(1);
+ DCHECK(dst.is(i.InputSimd128Register(0)));
+ __ vmov(kScratchQuadReg, src1);
+ __ vtrn(Neon32, dst, kScratchQuadReg);
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
+ break;
+ }
+ case kArmS32x4TransposeRight: {
martyn.capewell 2017/04/10 16:46:56 vdup dst.low, src.low[1] vdup dst.high, src.high
+ Simd128Register dst = i.OutputSimd128Register(),
+ src1 = i.InputSimd128Register(1);
+ DCHECK(dst.is(i.InputSimd128Register(0)));
+ __ vmov(kScratchQuadReg, src1);
+ __ vtrn(Neon32, kScratchQuadReg, dst);
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
+ break;
+ }
+ case kArmS16x8ZipLeft: {
+ Simd128Register dst = i.OutputSimd128Register(),
+ src1 = i.InputSimd128Register(1);
+ DCHECK(dst.is(i.InputSimd128Register(0)));
+ __ vmov(dst.high(), src1.low());
+ __ vzip(Neon16, dst.low(), dst.high());
+ break;
+ }
+ case kArmS16x8ZipRight: {
+ Simd128Register dst = i.OutputSimd128Register(),
+ src1 = i.InputSimd128Register(1);
+ DCHECK(dst.is(i.InputSimd128Register(0)));
+ __ vmov(dst.low(), src1.high());
+ __ vzip(Neon16, dst.low(), dst.high());
+ break;
+ }
+ case kArmS16x8UnzipLeft: {
martyn.capewell 2017/04/10 16:46:55 vmovn.i32 dst.low, src vmov dst.high, dst.low
+ Simd128Register dst = i.OutputSimd128Register(),
+ src1 = i.InputSimd128Register(1);
+ DCHECK(dst.is(i.InputSimd128Register(0)));
+ __ vmov(kScratchQuadReg, src1);
+ __ vuzp(Neon16, dst, kScratchQuadReg);
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
+ break;
+ }
+ case kArmS16x8UnzipRight: {
martyn.capewell 2017/04/10 16:46:55 vshrn.i32 dst.low, src, #16 vmov dst.high, dst.l
+ Simd128Register dst = i.OutputSimd128Register(),
+ src1 = i.InputSimd128Register(1);
+ DCHECK(dst.is(i.InputSimd128Register(0)));
+ __ vmov(kScratchQuadReg, src1);
+ __ vuzp(Neon16, kScratchQuadReg, dst);
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
+ break;
+ }
+ case kArmS16x8TransposeLeft: {
+ Simd128Register dst = i.OutputSimd128Register(),
+ src1 = i.InputSimd128Register(1);
+ DCHECK(dst.is(i.InputSimd128Register(0)));
+ __ vmov(kScratchQuadReg, src1);
+ __ vtrn(Neon16, dst, kScratchQuadReg);
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
+ break;
+ }
+ case kArmS16x8TransposeRight: {
+ Simd128Register dst = i.OutputSimd128Register(),
+ src1 = i.InputSimd128Register(1);
+ DCHECK(dst.is(i.InputSimd128Register(0)));
+ __ vmov(kScratchQuadReg, src1);
+ __ vtrn(Neon16, kScratchQuadReg, dst);
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
+ break;
+ }
+ case kArmS8x16ZipLeft: {
+ Simd128Register dst = i.OutputSimd128Register(),
+ src1 = i.InputSimd128Register(1);
+ DCHECK(dst.is(i.InputSimd128Register(0)));
+ __ vmov(dst.high(), src1.low());
+ __ vzip(Neon8, dst.low(), dst.high());
+ break;
+ }
+ case kArmS8x16ZipRight: {
+ Simd128Register dst = i.OutputSimd128Register(),
+ src1 = i.InputSimd128Register(1);
+ DCHECK(dst.is(i.InputSimd128Register(0)));
+ __ vmov(dst.low(), src1.high());
+ __ vzip(Neon8, dst.low(), dst.high());
+ break;
+ }
+ case kArmS8x16UnzipLeft: {
martyn.capewell 2017/04/10 16:46:55 vmovn.i16 dst.low, src vmov dst.high, dst.low
+ Simd128Register dst = i.OutputSimd128Register(),
+ src1 = i.InputSimd128Register(1);
+ DCHECK(dst.is(i.InputSimd128Register(0)));
+ __ vmov(kScratchQuadReg, src1);
+ __ vuzp(Neon8, dst, kScratchQuadReg);
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
+ break;
+ }
+ case kArmS8x16UnzipRight: {
martyn.capewell 2017/04/10 16:46:55 vshrn.i16, dst.low, src, #8 vmov dst.high, dst.l
+ Simd128Register dst = i.OutputSimd128Register(),
+ src1 = i.InputSimd128Register(1);
+ DCHECK(dst.is(i.InputSimd128Register(0)));
+ __ vmov(kScratchQuadReg, src1);
+ __ vuzp(Neon8, kScratchQuadReg, dst);
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
+ break;
+ }
+ case kArmS8x16TransposeLeft: {
+ Simd128Register dst = i.OutputSimd128Register(),
+ src1 = i.InputSimd128Register(1);
+ DCHECK(dst.is(i.InputSimd128Register(0)));
+ __ vmov(kScratchQuadReg, src1);
+ __ vtrn(Neon8, dst, kScratchQuadReg);
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
+ break;
+ }
+ case kArmS8x16TransposeRight: {
+ Simd128Register dst = i.OutputSimd128Register(),
+ src1 = i.InputSimd128Register(1);
+ DCHECK(dst.is(i.InputSimd128Register(0)));
+ __ vmov(kScratchQuadReg, src1);
+ __ vtrn(Neon8, kScratchQuadReg, dst);
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
+ break;
+ }
+ case kArmS8x16Concat: {
+ __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0),
+ i.InputSimd128Register(1), i.InputInt4(2));
+ break;
+ }
+ case kArmS64x2Reverse: {
martyn.capewell 2017/04/10 16:46:55 vext(dst, src, src, 8);
bbudge 2017/04/10 23:26:47 Nice. However, since we expose VEXT (as S8x16Conca
+ Simd128Register dst = i.OutputSimd128Register(),
+ src = i.InputSimd128Register(0);
+ if (!dst.is(src)) __ vmov(dst, src);
+ __ vswp(dst.low(), dst.high());
+ break;
+ }
+ case kArmS32x2Reverse: {
+ __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
+ case kArmS16x4Reverse: {
+ __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
+ case kArmS16x2Reverse: {
+ __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
+ case kArmS8x8Reverse: {
+ __ vrev64(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
+ case kArmS8x4Reverse: {
+ __ vrev32(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
+ case kArmS8x2Reverse: {
+ __ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
+ break;
+ }
case kArmS1x4AnyTrue: {
const QwNeonRegister& src = i.InputSimd128Register(0);
__ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high());
« no previous file with comments | « src/arm/simulator-arm.cc ('k') | src/compiler/arm/instruction-codes-arm.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698