Index: src/compiler/arm/code-generator-arm.cc |
diff --git a/src/compiler/arm/code-generator-arm.cc b/src/compiler/arm/code-generator-arm.cc |
index bd2a942bcc249df170989cde9bdde65cfc771ae8..c25aaf3bccdf72140e8feec05912002427848064 100644 |
--- a/src/compiler/arm/code-generator-arm.cc |
+++ b/src/compiler/arm/code-generator-arm.cc |
@@ -2028,6 +2028,198 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
i.InputSimd128Register(2)); |
break; |
} |
+ case kArmS32x4ZipLeft: { |
martyn.capewell
2017/04/10 16:46:55
It might be helpful to add a comment to each of th
bbudge
2017/04/10 23:26:47
Here's what this is supposed to do:
src0 = 0,1,2,3
|
+ Simd128Register dst = i.OutputSimd128Register(), |
+ src1 = i.InputSimd128Register(1); |
+ DCHECK(dst.is(i.InputSimd128Register(0))); |
+ __ vmov(dst.high(), src1.low()); |
martyn.capewell
2017/04/10 16:46:55
If the assembler supports it, you could use:
vdu
|
+ __ vzip(Neon32, dst.low(), dst.high()); |
+ break; |
+ } |
+ case kArmS32x4ZipRight: { |
martyn.capewell
2017/04/10 16:46:55
vdup.32 dst.low, src.high[0]
vdup.32 dst.high, s
|
+ Simd128Register dst = i.OutputSimd128Register(), |
+ src1 = i.InputSimd128Register(1); |
+ DCHECK(dst.is(i.InputSimd128Register(0))); |
+ __ vmov(dst.low(), src1.high()); |
+ __ vzip(Neon32, dst.low(), dst.high()); |
+ break; |
+ } |
+ case kArmS32x4UnzipLeft: { |
martyn.capewell
2017/04/10 16:46:55
vmovn.i64 dst.low, src
vmov dst.high, dst.low
|
+ Simd128Register dst = i.OutputSimd128Register(), |
+ src1 = i.InputSimd128Register(1); |
+ DCHECK(dst.is(i.InputSimd128Register(0))); |
+ __ vmov(kScratchQuadReg, src1); |
+ __ vuzp(Neon32, dst, kScratchQuadReg); |
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); |
+ break; |
+ } |
+ case kArmS32x4UnzipRight: { |
martyn.capewell
2017/04/10 16:46:55
vshrn.i64 dst.low, src, #32
vmov dst.high, dst.l
|
+ Simd128Register dst = i.OutputSimd128Register(), |
+ src1 = i.InputSimd128Register(1); |
+ DCHECK(dst.is(i.InputSimd128Register(0))); |
+ __ vmov(kScratchQuadReg, src1); |
+ __ vuzp(Neon32, kScratchQuadReg, dst); |
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); |
+ break; |
+ } |
+ case kArmS32x4TransposeLeft: { |
martyn.capewell
2017/04/10 16:46:55
vdup.32 dst.low, src.low[0]
vdup.32 dst.high, sr
|
+ Simd128Register dst = i.OutputSimd128Register(), |
+ src1 = i.InputSimd128Register(1); |
+ DCHECK(dst.is(i.InputSimd128Register(0))); |
+ __ vmov(kScratchQuadReg, src1); |
+ __ vtrn(Neon32, dst, kScratchQuadReg); |
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); |
+ break; |
+ } |
+ case kArmS32x4TransposeRight: { |
martyn.capewell
2017/04/10 16:46:56
vdup dst.low, src.low[1]
vdup dst.high, src.high
|
+ Simd128Register dst = i.OutputSimd128Register(), |
+ src1 = i.InputSimd128Register(1); |
+ DCHECK(dst.is(i.InputSimd128Register(0))); |
+ __ vmov(kScratchQuadReg, src1); |
+ __ vtrn(Neon32, kScratchQuadReg, dst); |
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); |
+ break; |
+ } |
+ case kArmS16x8ZipLeft: { |
+ Simd128Register dst = i.OutputSimd128Register(), |
+ src1 = i.InputSimd128Register(1); |
+ DCHECK(dst.is(i.InputSimd128Register(0))); |
+ __ vmov(dst.high(), src1.low()); |
+ __ vzip(Neon16, dst.low(), dst.high()); |
+ break; |
+ } |
+ case kArmS16x8ZipRight: { |
+ Simd128Register dst = i.OutputSimd128Register(), |
+ src1 = i.InputSimd128Register(1); |
+ DCHECK(dst.is(i.InputSimd128Register(0))); |
+ __ vmov(dst.low(), src1.high()); |
+ __ vzip(Neon16, dst.low(), dst.high()); |
+ break; |
+ } |
+ case kArmS16x8UnzipLeft: { |
martyn.capewell
2017/04/10 16:46:55
vmovn.i32 dst.low, src
vmov dst.high, dst.low
|
+ Simd128Register dst = i.OutputSimd128Register(), |
+ src1 = i.InputSimd128Register(1); |
+ DCHECK(dst.is(i.InputSimd128Register(0))); |
+ __ vmov(kScratchQuadReg, src1); |
+ __ vuzp(Neon16, dst, kScratchQuadReg); |
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); |
+ break; |
+ } |
+ case kArmS16x8UnzipRight: { |
martyn.capewell
2017/04/10 16:46:55
vshrn.i32 dst.low, src, #16
vmov dst.high, dst.l
|
+ Simd128Register dst = i.OutputSimd128Register(), |
+ src1 = i.InputSimd128Register(1); |
+ DCHECK(dst.is(i.InputSimd128Register(0))); |
+ __ vmov(kScratchQuadReg, src1); |
+ __ vuzp(Neon16, kScratchQuadReg, dst); |
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); |
+ break; |
+ } |
+ case kArmS16x8TransposeLeft: { |
+ Simd128Register dst = i.OutputSimd128Register(), |
+ src1 = i.InputSimd128Register(1); |
+ DCHECK(dst.is(i.InputSimd128Register(0))); |
+ __ vmov(kScratchQuadReg, src1); |
+ __ vtrn(Neon16, dst, kScratchQuadReg); |
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); |
+ break; |
+ } |
+ case kArmS16x8TransposeRight: { |
+ Simd128Register dst = i.OutputSimd128Register(), |
+ src1 = i.InputSimd128Register(1); |
+ DCHECK(dst.is(i.InputSimd128Register(0))); |
+ __ vmov(kScratchQuadReg, src1); |
+ __ vtrn(Neon16, kScratchQuadReg, dst); |
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); |
+ break; |
+ } |
+ case kArmS8x16ZipLeft: { |
+ Simd128Register dst = i.OutputSimd128Register(), |
+ src1 = i.InputSimd128Register(1); |
+ DCHECK(dst.is(i.InputSimd128Register(0))); |
+ __ vmov(dst.high(), src1.low()); |
+ __ vzip(Neon8, dst.low(), dst.high()); |
+ break; |
+ } |
+ case kArmS8x16ZipRight: { |
+ Simd128Register dst = i.OutputSimd128Register(), |
+ src1 = i.InputSimd128Register(1); |
+ DCHECK(dst.is(i.InputSimd128Register(0))); |
+ __ vmov(dst.low(), src1.high()); |
+ __ vzip(Neon8, dst.low(), dst.high()); |
+ break; |
+ } |
+ case kArmS8x16UnzipLeft: { |
martyn.capewell
2017/04/10 16:46:55
vmovn.i16 dst.low, src
vmov dst.high, dst.low
|
+ Simd128Register dst = i.OutputSimd128Register(), |
+ src1 = i.InputSimd128Register(1); |
+ DCHECK(dst.is(i.InputSimd128Register(0))); |
+ __ vmov(kScratchQuadReg, src1); |
+ __ vuzp(Neon8, dst, kScratchQuadReg); |
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); |
+ break; |
+ } |
+ case kArmS8x16UnzipRight: { |
martyn.capewell
2017/04/10 16:46:55
vshrn.i16, dst.low, src, #8
vmov dst.high, dst.l
|
+ Simd128Register dst = i.OutputSimd128Register(), |
+ src1 = i.InputSimd128Register(1); |
+ DCHECK(dst.is(i.InputSimd128Register(0))); |
+ __ vmov(kScratchQuadReg, src1); |
+ __ vuzp(Neon8, kScratchQuadReg, dst); |
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); |
+ break; |
+ } |
+ case kArmS8x16TransposeLeft: { |
+ Simd128Register dst = i.OutputSimd128Register(), |
+ src1 = i.InputSimd128Register(1); |
+ DCHECK(dst.is(i.InputSimd128Register(0))); |
+ __ vmov(kScratchQuadReg, src1); |
+ __ vtrn(Neon8, dst, kScratchQuadReg); |
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); |
+ break; |
+ } |
+ case kArmS8x16TransposeRight: { |
+ Simd128Register dst = i.OutputSimd128Register(), |
+ src1 = i.InputSimd128Register(1); |
+ DCHECK(dst.is(i.InputSimd128Register(0))); |
+ __ vmov(kScratchQuadReg, src1); |
+ __ vtrn(Neon8, kScratchQuadReg, dst); |
+ __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero); |
+ break; |
+ } |
+ case kArmS8x16Concat: { |
+ __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0), |
+ i.InputSimd128Register(1), i.InputInt4(2)); |
+ break; |
+ } |
+ case kArmS64x2Reverse: { |
martyn.capewell
2017/04/10 16:46:55
vext(dst, src, src, 8);
bbudge
2017/04/10 23:26:47
Nice. However, since we expose VEXT (as S8x16Conca
|
+ Simd128Register dst = i.OutputSimd128Register(), |
+ src = i.InputSimd128Register(0); |
+ if (!dst.is(src)) __ vmov(dst, src); |
+ __ vswp(dst.low(), dst.high()); |
+ break; |
+ } |
+ case kArmS32x2Reverse: { |
+ __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
+ break; |
+ } |
+ case kArmS16x4Reverse: { |
+ __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
+ break; |
+ } |
+ case kArmS16x2Reverse: { |
+ __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
+ break; |
+ } |
+ case kArmS8x8Reverse: { |
+ __ vrev64(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
+ break; |
+ } |
+ case kArmS8x4Reverse: { |
+ __ vrev32(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
+ break; |
+ } |
+ case kArmS8x2Reverse: { |
+ __ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0)); |
+ break; |
+ } |
case kArmS1x4AnyTrue: { |
const QwNeonRegister& src = i.InputSimd128Register(0); |
__ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high()); |