Index: src/x64/assembler-x64.h |
diff --git a/src/x64/assembler-x64.h b/src/x64/assembler-x64.h |
index b2154fbaf4a61a7198481747f82b6f1a50ceae84..d29d9e113d9f3b3116aa54bac49baa17e9c14831 100644 |
--- a/src/x64/assembler-x64.h |
+++ b/src/x64/assembler-x64.h |
@@ -40,6 +40,7 @@ |
#include <deque> |
#include "src/assembler.h" |
+#include "src/x64/sse-instr.h" |
namespace v8 { |
namespace internal { |
@@ -1072,7 +1073,91 @@ class Assembler : public AssemblerBase { |
void movmskps(Register dst, XMMRegister src); |
+ void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2, |
+ SIMDPrefix pp, LeadingOpcode m, VexW w); |
+ void vinstr(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2, |
+ SIMDPrefix pp, LeadingOpcode m, VexW w); |
+ |
// SSE2 instructions |
+ void sse2_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape, |
+ byte opcode); |
+ void sse2_instr(XMMRegister dst, const Operand& src, byte prefix, byte escape, |
+ byte opcode); |
+#define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \ |
+ void instruction(XMMRegister dst, XMMRegister src) { \ |
+ sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \ |
+ } \ |
+ void instruction(XMMRegister dst, const Operand& src) { \ |
+ sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \ |
+ } |
+ |
+ SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION) |
+#undef DECLARE_SSE2_INSTRUCTION |
+ |
+#define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \ |
+ void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ |
+ vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \ |
+ } \ |
+ void v##instruction(XMMRegister dst, XMMRegister src1, \ |
+ const Operand& src2) { \ |
+ vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \ |
+ } |
+ |
+ SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION) |
+#undef DECLARE_SSE2_AVX_INSTRUCTION |
+ |
+ // SSE3 |
+ void lddqu(XMMRegister dst, const Operand& src); |
+ |
+ // SSSE3 |
+ void ssse3_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1, |
+ byte escape2, byte opcode); |
+ void ssse3_instr(XMMRegister dst, const Operand& src, byte prefix, |
+ byte escape1, byte escape2, byte opcode); |
+ |
+#define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2, \ |
+ opcode) \ |
+ void instruction(XMMRegister dst, XMMRegister src) { \ |
+ ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \ |
+ } \ |
+ void instruction(XMMRegister dst, const Operand& src) { \ |
+ ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \ |
+ } |
+ |
+ SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION) |
+#undef DECLARE_SSSE3_INSTRUCTION |
+ |
+ // SSE4 |
+ void sse4_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1, |
+ byte escape2, byte opcode); |
+ void sse4_instr(XMMRegister dst, const Operand& src, byte prefix, |
+ byte escape1, byte escape2, byte opcode); |
+#define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \ |
+ opcode) \ |
+ void instruction(XMMRegister dst, XMMRegister src) { \ |
+ sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \ |
+ } \ |
+ void instruction(XMMRegister dst, const Operand& src) { \ |
+ sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \ |
+ } |
+ |
+ SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION) |
+#undef DECLARE_SSE4_INSTRUCTION |
+ |
+#define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \ |
+ opcode) \ |
+ void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ |
+ vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \ |
+ } \ |
+ void v##instruction(XMMRegister dst, XMMRegister src1, \ |
+ const Operand& src2) { \ |
+ vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \ |
+ } |
+ |
+ SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION) |
+ SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION) |
+#undef DECLARE_SSE34_AVX_INSTRUCTION |
+ |
void movd(XMMRegister dst, Register src); |
void movd(XMMRegister dst, const Operand& src); |
void movd(Register dst, XMMRegister src); |
@@ -1101,8 +1186,12 @@ class Assembler : public AssemblerBase { |
void psllq(XMMRegister reg, byte imm8); |
void psrlq(XMMRegister reg, byte imm8); |
+ void psllw(XMMRegister reg, byte imm8); |
void pslld(XMMRegister reg, byte imm8); |
+ void psrlw(XMMRegister reg, byte imm8); |
void psrld(XMMRegister reg, byte imm8); |
+ void psraw(XMMRegister reg, byte imm8); |
+ void psrad(XMMRegister reg, byte imm8); |
void cvttsd2si(Register dst, const Operand& src); |
void cvttsd2si(Register dst, XMMRegister src); |
@@ -1155,7 +1244,6 @@ class Assembler : public AssemblerBase { |
void ucomisd(XMMRegister dst, XMMRegister src); |
void ucomisd(XMMRegister dst, const Operand& src); |
void cmpltsd(XMMRegister dst, XMMRegister src); |
- void pcmpeqd(XMMRegister dst, XMMRegister src); |
void movmskpd(Register dst, XMMRegister src); |
@@ -1166,7 +1254,16 @@ class Assembler : public AssemblerBase { |
// SSE 4.1 instruction |
void insertps(XMMRegister dst, XMMRegister src, byte imm8); |
void extractps(Register dst, XMMRegister src, byte imm8); |
+ void pextrb(Register dst, XMMRegister src, int8_t imm8); |
+ void pextrb(const Operand& dst, XMMRegister src, int8_t imm8); |
+ void pextrw(Register dst, XMMRegister src, int8_t imm8); |
+ void pextrw(const Operand& dst, XMMRegister src, int8_t imm8); |
void pextrd(Register dst, XMMRegister src, int8_t imm8); |
+ void pextrd(const Operand& dst, XMMRegister src, int8_t imm8); |
+ void pinsrb(XMMRegister dst, Register src, int8_t imm8); |
+ void pinsrb(XMMRegister dst, const Operand& src, int8_t imm8); |
+ void pinsrw(XMMRegister dst, Register src, int8_t imm8); |
+ void pinsrw(XMMRegister dst, const Operand& src, int8_t imm8); |
void pinsrd(XMMRegister dst, Register src, int8_t imm8); |
void pinsrd(XMMRegister dst, const Operand& src, int8_t imm8); |
@@ -1208,18 +1305,9 @@ class Assembler : public AssemblerBase { |
void movups(XMMRegister dst, XMMRegister src); |
void movups(XMMRegister dst, const Operand& src); |
void movups(const Operand& dst, XMMRegister src); |
- void paddd(XMMRegister dst, XMMRegister src); |
- void paddd(XMMRegister dst, const Operand& src); |
- void psubd(XMMRegister dst, XMMRegister src); |
- void psubd(XMMRegister dst, const Operand& src); |
- void pmulld(XMMRegister dst, XMMRegister src); |
- void pmulld(XMMRegister dst, const Operand& src); |
- void pmuludq(XMMRegister dst, XMMRegister src); |
- void pmuludq(XMMRegister dst, const Operand& src); |
void psrldq(XMMRegister dst, uint8_t shift); |
void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle); |
- void cvtps2dq(XMMRegister dst, XMMRegister src); |
- void cvtps2dq(XMMRegister dst, const Operand& src); |
+ void pshufd(XMMRegister dst, const Operand& src, uint8_t shuffle); |
void cvtdq2ps(XMMRegister dst, XMMRegister src); |
void cvtdq2ps(XMMRegister dst, const Operand& src); |
@@ -1421,7 +1509,6 @@ class Assembler : public AssemblerBase { |
AVX_P_3(vand, 0x54); |
AVX_P_3(vor, 0x56); |
AVX_P_3(vxor, 0x57); |
- AVX_3(vpcmpeqd, 0x76, vpd); |
AVX_3(vcvtsd2ss, 0x5a, vsd); |
#undef AVX_3 |
@@ -1440,102 +1527,98 @@ class Assembler : public AssemblerBase { |
emit(imm8); |
} |
void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { |
- vsd(0x5a, dst, src1, src2, kF3, k0F, kWIG); |
+ vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG); |
} |
void vcvtss2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { |
- vsd(0x5a, dst, src1, src2, kF3, k0F, kWIG); |
+ vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG); |
} |
void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Register src2) { |
XMMRegister isrc2 = {src2.code()}; |
- vsd(0x2a, dst, src1, isrc2, kF2, k0F, kW0); |
+ vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW0); |
} |
void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { |
- vsd(0x2a, dst, src1, src2, kF2, k0F, kW0); |
+ vinstr(0x2a, dst, src1, src2, kF2, k0F, kW0); |
} |
void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, Register src2) { |
XMMRegister isrc2 = {src2.code()}; |
- vsd(0x2a, dst, src1, isrc2, kF3, k0F, kW0); |
+ vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW0); |
} |
void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { |
- vsd(0x2a, dst, src1, src2, kF3, k0F, kW0); |
+ vinstr(0x2a, dst, src1, src2, kF3, k0F, kW0); |
} |
void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, Register src2) { |
XMMRegister isrc2 = {src2.code()}; |
- vsd(0x2a, dst, src1, isrc2, kF3, k0F, kW1); |
+ vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW1); |
} |
void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) { |
- vsd(0x2a, dst, src1, src2, kF3, k0F, kW1); |
+ vinstr(0x2a, dst, src1, src2, kF3, k0F, kW1); |
} |
void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, Register src2) { |
XMMRegister isrc2 = {src2.code()}; |
- vsd(0x2a, dst, src1, isrc2, kF2, k0F, kW1); |
+ vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW1); |
} |
void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) { |
- vsd(0x2a, dst, src1, src2, kF2, k0F, kW1); |
+ vinstr(0x2a, dst, src1, src2, kF2, k0F, kW1); |
} |
void vcvttss2si(Register dst, XMMRegister src) { |
XMMRegister idst = {dst.code()}; |
- vsd(0x2c, idst, xmm0, src, kF3, k0F, kW0); |
+ vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0); |
} |
void vcvttss2si(Register dst, const Operand& src) { |
XMMRegister idst = {dst.code()}; |
- vsd(0x2c, idst, xmm0, src, kF3, k0F, kW0); |
+ vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0); |
} |
void vcvttsd2si(Register dst, XMMRegister src) { |
XMMRegister idst = {dst.code()}; |
- vsd(0x2c, idst, xmm0, src, kF2, k0F, kW0); |
+ vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0); |
} |
void vcvttsd2si(Register dst, const Operand& src) { |
XMMRegister idst = {dst.code()}; |
- vsd(0x2c, idst, xmm0, src, kF2, k0F, kW0); |
+ vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0); |
} |
void vcvttss2siq(Register dst, XMMRegister src) { |
XMMRegister idst = {dst.code()}; |
- vsd(0x2c, idst, xmm0, src, kF3, k0F, kW1); |
+ vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1); |
} |
void vcvttss2siq(Register dst, const Operand& src) { |
XMMRegister idst = {dst.code()}; |
- vsd(0x2c, idst, xmm0, src, kF3, k0F, kW1); |
+ vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1); |
} |
void vcvttsd2siq(Register dst, XMMRegister src) { |
XMMRegister idst = {dst.code()}; |
- vsd(0x2c, idst, xmm0, src, kF2, k0F, kW1); |
+ vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1); |
} |
void vcvttsd2siq(Register dst, const Operand& src) { |
XMMRegister idst = {dst.code()}; |
- vsd(0x2c, idst, xmm0, src, kF2, k0F, kW1); |
+ vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1); |
} |
void vcvtsd2si(Register dst, XMMRegister src) { |
XMMRegister idst = {dst.code()}; |
- vsd(0x2d, idst, xmm0, src, kF2, k0F, kW0); |
+ vinstr(0x2d, idst, xmm0, src, kF2, k0F, kW0); |
} |
void vucomisd(XMMRegister dst, XMMRegister src) { |
- vsd(0x2e, dst, xmm0, src, k66, k0F, kWIG); |
+ vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG); |
} |
void vucomisd(XMMRegister dst, const Operand& src) { |
- vsd(0x2e, dst, xmm0, src, k66, k0F, kWIG); |
+ vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG); |
} |
void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2, |
RoundingMode mode) { |
- vsd(0x0a, dst, src1, src2, k66, k0F3A, kWIG); |
+ vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG); |
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception. |
} |
void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, |
RoundingMode mode) { |
- vsd(0x0b, dst, src1, src2, k66, k0F3A, kWIG); |
+ vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG); |
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception. |
} |
void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2) { |
- vsd(op, dst, src1, src2, kF2, k0F, kWIG); |
+ vinstr(op, dst, src1, src2, kF2, k0F, kWIG); |
} |
void vsd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2) { |
- vsd(op, dst, src1, src2, kF2, k0F, kWIG); |
+ vinstr(op, dst, src1, src2, kF2, k0F, kWIG); |
} |
- void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2, |
- SIMDPrefix pp, LeadingOpcode m, VexW w); |
- void vsd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2, |
- SIMDPrefix pp, LeadingOpcode m, VexW w); |
void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) { |
vss(0x10, dst, src1, src2); |
@@ -1616,6 +1699,101 @@ class Assembler : public AssemblerBase { |
#undef AVX_CMP_P |
+ void vlddqu(XMMRegister dst, const Operand& src) { |
+ vinstr(0xF0, dst, xmm0, src, kF2, k0F, kWIG); |
+ } |
+ void vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8) { |
+ XMMRegister iop = {6}; |
+ vinstr(0x71, iop, dst, src, k66, k0F, kWIG); |
+ emit(imm8); |
+ } |
+ void vpsrlw(XMMRegister dst, XMMRegister src, int8_t imm8) { |
+ XMMRegister iop = {2}; |
+ vinstr(0x71, iop, dst, src, k66, k0F, kWIG); |
+ emit(imm8); |
+ } |
+ void vpsraw(XMMRegister dst, XMMRegister src, int8_t imm8) { |
+ XMMRegister iop = {4}; |
+ vinstr(0x71, iop, dst, src, k66, k0F, kWIG); |
+ emit(imm8); |
+ } |
+ void vpslld(XMMRegister dst, XMMRegister src, int8_t imm8) { |
+ XMMRegister iop = {6}; |
+ vinstr(0x72, iop, dst, src, k66, k0F, kWIG); |
+ emit(imm8); |
+ } |
+ void vpsrld(XMMRegister dst, XMMRegister src, int8_t imm8) { |
+ XMMRegister iop = {2}; |
+ vinstr(0x72, iop, dst, src, k66, k0F, kWIG); |
+ emit(imm8); |
+ } |
+ void vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8) { |
+ XMMRegister iop = {4}; |
+ vinstr(0x72, iop, dst, src, k66, k0F, kWIG); |
+ emit(imm8); |
+ } |
+ void vpextrb(Register dst, XMMRegister src, int8_t imm8) { |
+ XMMRegister idst = {dst.code()}; |
+ vinstr(0x14, src, xmm0, idst, k66, k0F3A, kW0); |
+ emit(imm8); |
+ } |
+ void vpextrb(const Operand& dst, XMMRegister src, int8_t imm8) { |
+ vinstr(0x14, src, xmm0, dst, k66, k0F3A, kW0); |
+ emit(imm8); |
+ } |
+ void vpextrw(Register dst, XMMRegister src, int8_t imm8) { |
+ XMMRegister idst = {dst.code()}; |
+ vinstr(0xc5, idst, xmm0, src, k66, k0F, kW0); |
+ emit(imm8); |
+ } |
+ void vpextrw(const Operand& dst, XMMRegister src, int8_t imm8) { |
+ vinstr(0x15, src, xmm0, dst, k66, k0F3A, kW0); |
+ emit(imm8); |
+ } |
+ void vpextrd(Register dst, XMMRegister src, int8_t imm8) { |
+ XMMRegister idst = {dst.code()}; |
+ vinstr(0x16, src, xmm0, idst, k66, k0F3A, kW0); |
+ emit(imm8); |
+ } |
+ void vpextrd(const Operand& dst, XMMRegister src, int8_t imm8) { |
+ vinstr(0x16, src, xmm0, dst, k66, k0F3A, kW0); |
+ emit(imm8); |
+ } |
+ void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) { |
+ XMMRegister isrc = {src2.code()}; |
+ vinstr(0x20, dst, src1, isrc, k66, k0F3A, kW0); |
+ emit(imm8); |
+ } |
+ void vpinsrb(XMMRegister dst, XMMRegister src1, const Operand& src2, |
+ int8_t imm8) { |
+ vinstr(0x20, dst, src1, src2, k66, k0F3A, kW0); |
+ emit(imm8); |
+ } |
+ void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) { |
+ XMMRegister isrc = {src2.code()}; |
+ vinstr(0xc4, dst, src1, isrc, k66, k0F, kW0); |
+ emit(imm8); |
+ } |
+ void vpinsrw(XMMRegister dst, XMMRegister src1, const Operand& src2, |
+ int8_t imm8) { |
+ vinstr(0xc4, dst, src1, src2, k66, k0F, kW0); |
+ emit(imm8); |
+ } |
+ void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) { |
+ XMMRegister isrc = {src2.code()}; |
+ vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW0); |
+ emit(imm8); |
+ } |
+ void vpinsrd(XMMRegister dst, XMMRegister src1, const Operand& src2, |
+ int8_t imm8) { |
+ vinstr(0x22, dst, src1, src2, k66, k0F3A, kW0); |
+ emit(imm8); |
+ } |
+ void vpshufd(XMMRegister dst, XMMRegister src, int8_t imm8) { |
+ vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG); |
+ emit(imm8); |
+ } |
+ |
void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2); |
void vps(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2); |
void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2); |