Index: src/compiler/x64/code-generator-x64.cc |
diff --git a/src/compiler/x64/code-generator-x64.cc b/src/compiler/x64/code-generator-x64.cc |
index 8f9b69d2c8b033f7654bbc6f53bd4b5d23934667..c73b03bf20c3c70fee082c42483e857434a7a0d1 100644 |
--- a/src/compiler/x64/code-generator-x64.cc |
+++ b/src/compiler/x64/code-generator-x64.cc |
@@ -2155,10 +2155,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
break; |
} |
case kX64Int32x4Splat: { |
- CpuFeatureScope sse_scope(masm(), SSE4_1); |
XMMRegister dst = i.OutputSimd128Register(); |
__ Movd(dst, i.InputRegister(0)); |
bbudge
2017/02/28 19:34:39
Still might use AVX.
gdeepti
2017/03/13 20:37:52
Fixed now.
|
- __ shufps(dst, dst, 0x0); |
+ __ pshufd(dst, dst, 0x0); |
break; |
} |
case kX64Int32x4ExtractLane: { |
@@ -2177,17 +2176,68 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
break; |
} |
case kX64Int32x4Add: { |
- CpuFeatureScope sse_scope(masm(), SSE4_1); |
__ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
break; |
} |
case kX64Int32x4Sub: { |
- CpuFeatureScope sse_scope(masm(), SSE4_1); |
__ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
break; |
} |
- case kX64Simd128Zero: { |
+ case kX64Int32x4Mul: { |
CpuFeatureScope sse_scope(masm(), SSE4_1); |
+ __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
+ break; |
+ } |
+ case kX64Int32x4Min: { |
+ CpuFeatureScope sse_scope(masm(), SSE4_1); |
+ __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
+ break; |
+ } |
+ case kX64Int32x4Max: { |
+ CpuFeatureScope sse_scope(masm(), SSE4_1); |
+ __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
+ break; |
+ } |
+ case kX64Uint32x4Min: { |
+ CpuFeatureScope sse_scope(masm(), SSE4_1); |
+ __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
+ break; |
+ } |
+ case kX64Uint32x4Max: { |
+ CpuFeatureScope sse_scope(masm(), SSE4_1); |
+ __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
+ break; |
+ } |
+ case kX64Int32x4Equal: { |
+ __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
+ break; |
+ } |
+ case kX64Int32x4NotEqual: { |
+ __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1)); |
+ __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); |
+ __ pxor(i.OutputSimd128Register(), kScratchDoubleReg); |
+ break; |
+ } |
+ case kX64Int32x4ShiftLeftByScalar: { |
+ __ pslld(i.OutputSimd128Register(), i.InputInt8(1)); |
+ break; |
+ } |
+ case kX64Int32x4ShiftRightByScalar: { |
+ __ psrad(i.OutputSimd128Register(), i.InputInt8(1)); |
+ break; |
+ } |
+ case kX64Uint32x4ShiftRightByScalar: { |
+ __ psrld(i.OutputSimd128Register(), i.InputInt8(1)); |
+ break; |
+ } |
+ case kX64Simd32x4Select: { |
+ XMMRegister dst = i.OutputSimd128Register(); |
bbudge
2017/02/28 19:34:39
A comment here that dst contains the mask initiall
gdeepti
2017/03/13 20:37:52
Done.
|
+ __ xorps(i.InputSimd128Register(1), i.InputSimd128Register(2)); |
bbudge
2017/02/28 19:34:39
Maybe use the scratch FP register for this? Otherw
gdeepti
2017/03/13 20:37:52
Done.
|
+ __ andps(dst, i.InputSimd128Register(1)); |
+ __ xorps(dst, i.InputSimd128Register(2)); |
+ break; |
+ } |
+ case kX64Simd128Zero: { |
XMMRegister dst = i.OutputSimd128Register(); |
__ xorps(dst, dst); |
break; |