Index: src/compiler/x64/code-generator-x64.cc |
diff --git a/src/compiler/x64/code-generator-x64.cc b/src/compiler/x64/code-generator-x64.cc |
index fb5fbd6cbe47c4e2d3dcaf25039bb947b6ad5f79..ab994a32e49b3b4f342d8d40a197f662d44a856b 100644 |
--- a/src/compiler/x64/code-generator-x64.cc |
+++ b/src/compiler/x64/code-generator-x64.cc |
@@ -2143,10 +2143,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
break; |
} |
case kX64Int32x4Splat: { |
- CpuFeatureScope sse_scope(masm(), SSE4_1); |
XMMRegister dst = i.OutputSimd128Register(); |
- __ Movd(dst, i.InputRegister(0)); |
- __ shufps(dst, dst, 0x0); |
+ __ movd(dst, i.InputRegister(0)); |
+ __ pshufd(dst, dst, 0x0); |
break; |
} |
case kX64Int32x4ExtractLane: { |
@@ -2165,17 +2164,70 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
break; |
} |
case kX64Int32x4Add: { |
- CpuFeatureScope sse_scope(masm(), SSE4_1); |
__ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
break; |
} |
case kX64Int32x4Sub: { |
- CpuFeatureScope sse_scope(masm(), SSE4_1); |
__ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
break; |
} |
- case kX64Simd128Zero: { |
+ case kX64Int32x4Mul: { |
CpuFeatureScope sse_scope(masm(), SSE4_1); |
+ __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
+ break; |
+ } |
+ case kX64Int32x4Min: { |
+ CpuFeatureScope sse_scope(masm(), SSE4_1); |
+ __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
+ break; |
+ } |
+ case kX64Int32x4Max: { |
+ CpuFeatureScope sse_scope(masm(), SSE4_1); |
+ __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
+ break; |
+ } |
+ case kX64Uint32x4Min: { |
+ CpuFeatureScope sse_scope(masm(), SSE4_1); |
+ __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
+ break; |
+ } |
+ case kX64Uint32x4Max: { |
+ CpuFeatureScope sse_scope(masm(), SSE4_1); |
+ __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
+ break; |
+ } |
+ case kX64Int32x4Equal: { |
+ __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
+ break; |
+ } |
+ case kX64Int32x4NotEqual: { |
+ __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
+ __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); |
+ break; |
+ } |
+ case kX64Int32x4ShiftLeftByScalar: { |
+ __ pslld(i.OutputSimd128Register(), i.InputInt8(1)); |
+ break; |
+ } |
+ case kX64Int32x4ShiftRightByScalar: { |
+ __ psrad(i.OutputSimd128Register(), i.InputInt8(1)); |
+ break; |
+ } |
+ case kX64Uint32x4ShiftRightByScalar: { |
+ __ psrld(i.OutputSimd128Register(), i.InputInt8(1)); |
+ break; |
+ } |
+ case kX64Simd32x4Select: { |
+ // Mask used here is stored in dst. |
+ XMMRegister dst = i.OutputSimd128Register(); |
+ __ movaps(kScratchDoubleReg, i.InputSimd128Register(1)); |
+ __ xorps(kScratchDoubleReg, i.InputSimd128Register(2)); |
+ __ andps(dst, kScratchDoubleReg); |
+ __ xorps(dst, i.InputSimd128Register(2)); |
Jing
2017/03/15 08:20:33
Just for your information that blendvps in SSE4_1
gdeepti
2017/03/15 13:23:02
Thanks for the pointer, as I understand it this is
|
+ break; |
+ } |
+ case kX64Simd128Zero: { |
XMMRegister dst = i.OutputSimd128Register(); |
__ xorps(dst, dst); |
break; |