Index: src/compiler/x64/code-generator-x64.cc |
diff --git a/src/compiler/x64/code-generator-x64.cc b/src/compiler/x64/code-generator-x64.cc |
index 2872aeac194dfb5fb3269751cc526d58fdf2d1bd..4b961f53dc7987a0560da616ad610d2c9e580119 100644 |
--- a/src/compiler/x64/code-generator-x64.cc |
+++ b/src/compiler/x64/code-generator-x64.cc |
@@ -2186,6 +2186,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
} |
break; |
} |
+ case kX64I32x4Neg: { |
+ XMMRegister dst = i.OutputSimd128Register(); |
+ XMMRegister src = i.InputSimd128Register(0); |
+ if (dst.is(src)) { |
+ __ movaps(kScratchDoubleReg, dst); |
+ __ pxor(dst, dst); |
+ __ psubd(dst, kScratchDoubleReg); |
+ } else { |
+ __ pxor(dst, dst); |
+ __ psubd(dst, src); |
+ } |
+ break; |
+ } |
case kX64I32x4Shl: { |
__ pslld(i.OutputSimd128Register(), i.InputInt8(1)); |
break; |
@@ -2232,6 +2245,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
__ pxor(i.OutputSimd128Register(), kScratchDoubleReg); |
break; |
} |
+ case kX64I32x4GtS: { |
+ __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
+ break; |
+ } |
+ case kX64I32x4GeS: { |
+ XMMRegister dst = i.OutputSimd128Register(); |
+ __ movaps(kScratchDoubleReg, i.InputSimd128Register(1)); |
bbudge
2017/06/22 17:30:10
Can you use pminsd here to save 1 instruction?
gdeepti
2017/06/27 20:47:42
Done.
|
+ __ pcmpgtd(kScratchDoubleReg, dst); |
+ __ pcmpeqd(dst, dst); |
+ __ pxor(dst, kScratchDoubleReg); |
+ break; |
+ } |
case kX64I32x4ShrU: { |
__ psrld(i.OutputSimd128Register(), i.InputInt8(1)); |
break; |
@@ -2246,6 +2271,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
__ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
break; |
} |
+ case kX64I32x4GtU: { |
+ XMMRegister dst = i.OutputSimd128Register(); |
+ XMMRegister src = i.InputSimd128Register(1); |
+ __ Set(kScratchRegister, 0x80000000); |
+ __ movd(kScratchDoubleReg, kScratchRegister); |
+ __ pshufd(kScratchDoubleReg, kScratchDoubleReg, 0x0); |
+ __ pxor(src, kScratchDoubleReg); |
+ __ pxor(dst, kScratchDoubleReg); |
+ __ pcmpgtd(dst, src); |
+ // Reset input to not clobber. |
+ __ pxor(src, kScratchDoubleReg); |
gdeepti
2017/06/21 03:44:01
This is similar to the code Clang generates for un
bbudge
2017/06/22 16:29:49
I think we'll have to support S128 constants at so
gdeepti
2017/06/22 17:26:48
I like the simplicity of the pminud/pmaxud approac
zvi
2017/06/24 23:04:09
I agree that the pmax/pmin option is preferable fo
gdeepti
2017/06/27 20:47:42
Bill:
This returns true for the == case as well,
zvi
2017/06/27 22:30:32
I didn't mean to say you are selecting MMX instruc
gdeepti
2017/06/27 23:23:37
Sorry for the confusion, I remembered stale state
|
+ break; |
+ } |
+ case kX64I32x4GeU: { |
+ CpuFeatureScope sse_scope(masm(), SSE4_1); |
+ XMMRegister dst = i.OutputSimd128Register(); |
+ XMMRegister src = i.InputSimd128Register(1); |
+ __ pminud(dst, src); |
+ __ pcmpeqd(dst, src); |
+ break; |
+ } |
case kX64S128Zero: { |
XMMRegister dst = i.OutputSimd128Register(); |
__ xorps(dst, dst); |
@@ -2276,6 +2322,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
} |
break; |
} |
+ case kX64I16x8Neg: { |
+ XMMRegister dst = i.OutputSimd128Register(); |
+ XMMRegister src = i.InputSimd128Register(0); |
+ if (dst.is(src)) { |
+ __ movaps(kScratchDoubleReg, dst); |
+ __ pxor(dst, dst); |
+ __ psubw(dst, kScratchDoubleReg); |
+ } else { |
+ __ pxor(dst, dst); |
+ __ psubw(dst, src); |
+ } |
+ break; |
+ } |
case kX64I16x8Shl: { |
__ psllw(i.OutputSimd128Register(), i.InputInt8(1)); |
break; |
@@ -2330,6 +2389,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
__ pxor(i.OutputSimd128Register(), kScratchDoubleReg); |
break; |
} |
+ case kX64I16x8GtS: { |
+ __ pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
+ break; |
+ } |
+ case kX64I16x8GeS: { |
+ XMMRegister dst = i.OutputSimd128Register(); |
+ __ movaps(kScratchDoubleReg, i.InputSimd128Register(1)); |
+ __ pcmpgtw(kScratchDoubleReg, dst); |
+ __ pcmpeqd(dst, dst); |
+ __ pxor(dst, kScratchDoubleReg); |
+ break; |
+ } |
case kX64I16x8ShrU: { |
__ psrlw(i.OutputSimd128Register(), i.InputInt8(1)); |
break; |
@@ -2352,6 +2423,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
__ pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
break; |
} |
+ case kX64I16x8GtU: { |
+ XMMRegister dst = i.OutputSimd128Register(); |
+ XMMRegister src = i.InputSimd128Register(1); |
+ __ Set(kScratchRegister, 0x80008000); |
+ __ movd(kScratchDoubleReg, kScratchRegister); |
+ __ pshufd(kScratchDoubleReg, kScratchDoubleReg, 0x0); |
+ __ pxor(src, kScratchDoubleReg); |
+ __ pxor(dst, kScratchDoubleReg); |
+ __ pcmpgtw(dst, src); |
+ // Reset input to not clobber. |
+ __ pxor(src, kScratchDoubleReg); |
+ break; |
+ } |
+ case kX64I16x8GeU: { |
+ CpuFeatureScope sse_scope(masm(), SSE4_1); |
+ XMMRegister dst = i.OutputSimd128Register(); |
+ XMMRegister src = i.InputSimd128Register(1); |
+ __ pminuw(dst, src); |
+ __ pcmpeqw(dst, src); |
+ break; |
+ } |
case kX64I8x16Splat: { |
CpuFeatureScope sse_scope(masm(), SSSE3); |
XMMRegister dst = i.OutputSimd128Register(); |
@@ -2377,6 +2469,19 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
} |
break; |
} |
+ case kX64I8x16Neg: { |
+ XMMRegister dst = i.OutputSimd128Register(); |
+ XMMRegister src = i.InputSimd128Register(0); |
+ if (dst.is(src)) { |
+ __ movaps(kScratchDoubleReg, dst); |
+ __ pxor(dst, dst); |
+ __ psubb(dst, kScratchDoubleReg); |
+ } else { |
+ __ pxor(dst, dst); |
+ __ psubb(dst, src); |
+ } |
+ break; |
+ } |
case kX64I8x16Add: { |
__ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
break; |
@@ -2413,6 +2518,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
__ pxor(i.OutputSimd128Register(), kScratchDoubleReg); |
break; |
} |
+ case kX64I8x16GtS: { |
+ __ pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
+ break; |
+ } |
+ case kX64I8x16GeS: { |
+ XMMRegister dst = i.OutputSimd128Register(); |
+ __ movaps(kScratchDoubleReg, i.InputSimd128Register(1)); |
+ __ pcmpgtb(kScratchDoubleReg, dst); |
+ __ pcmpeqd(dst, dst); |
+ __ pxor(dst, kScratchDoubleReg); |
+ break; |
+ } |
case kX64I8x16AddSaturateU: { |
__ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
break; |
@@ -2431,6 +2548,27 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
__ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
break; |
} |
+ case kX64I8x16GtU: { |
+ XMMRegister dst = i.OutputSimd128Register(); |
+ XMMRegister src = i.InputSimd128Register(1); |
+ __ Set(kScratchRegister, 0x80808080); |
+ __ movd(kScratchDoubleReg, kScratchRegister); |
+ __ pshufd(kScratchDoubleReg, kScratchDoubleReg, 0x0); |
+ __ pxor(src, kScratchDoubleReg); |
+ __ pxor(dst, kScratchDoubleReg); |
+ __ pcmpgtb(dst, src); |
+ // Reset input to not clobber. |
+ __ pxor(src, kScratchDoubleReg); |
+ break; |
+ } |
+ case kX64I8x16GeU: { |
+ CpuFeatureScope sse_scope(masm(), SSE4_1); |
+ XMMRegister dst = i.OutputSimd128Register(); |
+ XMMRegister src = i.InputSimd128Register(1); |
+ __ pminub(dst, src); |
+ __ pcmpeqb(dst, src); |
+ break; |
+ } |
case kX64S128And: { |
__ pand(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
break; |
@@ -2445,8 +2583,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
} |
case kX64S128Not: { |
XMMRegister dst = i.OutputSimd128Register(); |
- __ pcmpeqd(dst, dst); |
- __ pxor(dst, i.InputSimd128Register(1)); |
+ XMMRegister src = i.InputSimd128Register(0); |
+ if (dst.is(src)) { |
+ __ movaps(kScratchDoubleReg, dst); |
+ __ pcmpeqd(dst, dst); |
+ __ pxor(dst, kScratchDoubleReg); |
+ } else { |
+ __ pcmpeqd(dst, dst); |
+ __ pxor(dst, src); |
+ } |
+ |
break; |
} |
case kX64S128Select: { |