| Index: src/x64/assembler-x64.h
|
| diff --git a/src/x64/assembler-x64.h b/src/x64/assembler-x64.h
|
| index b2154fbaf4a61a7198481747f82b6f1a50ceae84..d29d9e113d9f3b3116aa54bac49baa17e9c14831 100644
|
| --- a/src/x64/assembler-x64.h
|
| +++ b/src/x64/assembler-x64.h
|
| @@ -40,6 +40,7 @@
|
| #include <deque>
|
|
|
| #include "src/assembler.h"
|
| +#include "src/x64/sse-instr.h"
|
|
|
| namespace v8 {
|
| namespace internal {
|
| @@ -1072,7 +1073,91 @@ class Assembler : public AssemblerBase {
|
|
|
| void movmskps(Register dst, XMMRegister src);
|
|
|
| + void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
| + SIMDPrefix pp, LeadingOpcode m, VexW w);
|
| + void vinstr(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2,
|
| + SIMDPrefix pp, LeadingOpcode m, VexW w);
|
| +
|
| // SSE2 instructions
|
| + void sse2_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape,
|
| + byte opcode);
|
| + void sse2_instr(XMMRegister dst, const Operand& src, byte prefix, byte escape,
|
| + byte opcode);
|
| +#define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
|
| + void instruction(XMMRegister dst, XMMRegister src) { \
|
| + sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
|
| + } \
|
| + void instruction(XMMRegister dst, const Operand& src) { \
|
| + sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
|
| + }
|
| +
|
| + SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
|
| +#undef DECLARE_SSE2_INSTRUCTION
|
| +
|
| +#define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
|
| + void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
|
| + vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
|
| + } \
|
| + void v##instruction(XMMRegister dst, XMMRegister src1, \
|
| + const Operand& src2) { \
|
| + vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
|
| + }
|
| +
|
| + SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
|
| +#undef DECLARE_SSE2_AVX_INSTRUCTION
|
| +
|
| + // SSE3
|
| + void lddqu(XMMRegister dst, const Operand& src);
|
| +
|
| + // SSSE3
|
| + void ssse3_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
|
| + byte escape2, byte opcode);
|
| + void ssse3_instr(XMMRegister dst, const Operand& src, byte prefix,
|
| + byte escape1, byte escape2, byte opcode);
|
| +
|
| +#define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2, \
|
| + opcode) \
|
| + void instruction(XMMRegister dst, XMMRegister src) { \
|
| + ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
|
| + } \
|
| + void instruction(XMMRegister dst, const Operand& src) { \
|
| + ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
|
| + }
|
| +
|
| + SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
|
| +#undef DECLARE_SSSE3_INSTRUCTION
|
| +
|
| + // SSE4
|
| + void sse4_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
|
| + byte escape2, byte opcode);
|
| + void sse4_instr(XMMRegister dst, const Operand& src, byte prefix,
|
| + byte escape1, byte escape2, byte opcode);
|
| +#define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \
|
| + opcode) \
|
| + void instruction(XMMRegister dst, XMMRegister src) { \
|
| + sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
|
| + } \
|
| + void instruction(XMMRegister dst, const Operand& src) { \
|
| + sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
|
| + }
|
| +
|
| + SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
|
| +#undef DECLARE_SSE4_INSTRUCTION
|
| +
|
| +#define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \
|
| + opcode) \
|
| + void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
|
| + vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
|
| + } \
|
| + void v##instruction(XMMRegister dst, XMMRegister src1, \
|
| + const Operand& src2) { \
|
| + vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
|
| + }
|
| +
|
| + SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
|
| + SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
|
| +#undef DECLARE_SSE34_AVX_INSTRUCTION
|
| +
|
| void movd(XMMRegister dst, Register src);
|
| void movd(XMMRegister dst, const Operand& src);
|
| void movd(Register dst, XMMRegister src);
|
| @@ -1101,8 +1186,12 @@ class Assembler : public AssemblerBase {
|
|
|
| void psllq(XMMRegister reg, byte imm8);
|
| void psrlq(XMMRegister reg, byte imm8);
|
| + void psllw(XMMRegister reg, byte imm8);
|
| void pslld(XMMRegister reg, byte imm8);
|
| + void psrlw(XMMRegister reg, byte imm8);
|
| void psrld(XMMRegister reg, byte imm8);
|
| + void psraw(XMMRegister reg, byte imm8);
|
| + void psrad(XMMRegister reg, byte imm8);
|
|
|
| void cvttsd2si(Register dst, const Operand& src);
|
| void cvttsd2si(Register dst, XMMRegister src);
|
| @@ -1155,7 +1244,6 @@ class Assembler : public AssemblerBase {
|
| void ucomisd(XMMRegister dst, XMMRegister src);
|
| void ucomisd(XMMRegister dst, const Operand& src);
|
| void cmpltsd(XMMRegister dst, XMMRegister src);
|
| - void pcmpeqd(XMMRegister dst, XMMRegister src);
|
|
|
| void movmskpd(Register dst, XMMRegister src);
|
|
|
| @@ -1166,7 +1254,16 @@ class Assembler : public AssemblerBase {
|
| // SSE 4.1 instruction
|
| void insertps(XMMRegister dst, XMMRegister src, byte imm8);
|
| void extractps(Register dst, XMMRegister src, byte imm8);
|
| + void pextrb(Register dst, XMMRegister src, int8_t imm8);
|
| + void pextrb(const Operand& dst, XMMRegister src, int8_t imm8);
|
| + void pextrw(Register dst, XMMRegister src, int8_t imm8);
|
| + void pextrw(const Operand& dst, XMMRegister src, int8_t imm8);
|
| void pextrd(Register dst, XMMRegister src, int8_t imm8);
|
| + void pextrd(const Operand& dst, XMMRegister src, int8_t imm8);
|
| + void pinsrb(XMMRegister dst, Register src, int8_t imm8);
|
| + void pinsrb(XMMRegister dst, const Operand& src, int8_t imm8);
|
| + void pinsrw(XMMRegister dst, Register src, int8_t imm8);
|
| + void pinsrw(XMMRegister dst, const Operand& src, int8_t imm8);
|
| void pinsrd(XMMRegister dst, Register src, int8_t imm8);
|
| void pinsrd(XMMRegister dst, const Operand& src, int8_t imm8);
|
|
|
| @@ -1208,18 +1305,9 @@ class Assembler : public AssemblerBase {
|
| void movups(XMMRegister dst, XMMRegister src);
|
| void movups(XMMRegister dst, const Operand& src);
|
| void movups(const Operand& dst, XMMRegister src);
|
| - void paddd(XMMRegister dst, XMMRegister src);
|
| - void paddd(XMMRegister dst, const Operand& src);
|
| - void psubd(XMMRegister dst, XMMRegister src);
|
| - void psubd(XMMRegister dst, const Operand& src);
|
| - void pmulld(XMMRegister dst, XMMRegister src);
|
| - void pmulld(XMMRegister dst, const Operand& src);
|
| - void pmuludq(XMMRegister dst, XMMRegister src);
|
| - void pmuludq(XMMRegister dst, const Operand& src);
|
| void psrldq(XMMRegister dst, uint8_t shift);
|
| void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle);
|
| - void cvtps2dq(XMMRegister dst, XMMRegister src);
|
| - void cvtps2dq(XMMRegister dst, const Operand& src);
|
| + void pshufd(XMMRegister dst, const Operand& src, uint8_t shuffle);
|
| void cvtdq2ps(XMMRegister dst, XMMRegister src);
|
| void cvtdq2ps(XMMRegister dst, const Operand& src);
|
|
|
| @@ -1421,7 +1509,6 @@ class Assembler : public AssemblerBase {
|
| AVX_P_3(vand, 0x54);
|
| AVX_P_3(vor, 0x56);
|
| AVX_P_3(vxor, 0x57);
|
| - AVX_3(vpcmpeqd, 0x76, vpd);
|
| AVX_3(vcvtsd2ss, 0x5a, vsd);
|
|
|
| #undef AVX_3
|
| @@ -1440,102 +1527,98 @@ class Assembler : public AssemblerBase {
|
| emit(imm8);
|
| }
|
| void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
| - vsd(0x5a, dst, src1, src2, kF3, k0F, kWIG);
|
| + vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
|
| }
|
| void vcvtss2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
| - vsd(0x5a, dst, src1, src2, kF3, k0F, kWIG);
|
| + vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
|
| }
|
| void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
|
| XMMRegister isrc2 = {src2.code()};
|
| - vsd(0x2a, dst, src1, isrc2, kF2, k0F, kW0);
|
| + vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW0);
|
| }
|
| void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
| - vsd(0x2a, dst, src1, src2, kF2, k0F, kW0);
|
| + vinstr(0x2a, dst, src1, src2, kF2, k0F, kW0);
|
| }
|
| void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
|
| XMMRegister isrc2 = {src2.code()};
|
| - vsd(0x2a, dst, src1, isrc2, kF3, k0F, kW0);
|
| + vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW0);
|
| }
|
| void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
| - vsd(0x2a, dst, src1, src2, kF3, k0F, kW0);
|
| + vinstr(0x2a, dst, src1, src2, kF3, k0F, kW0);
|
| }
|
| void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
|
| XMMRegister isrc2 = {src2.code()};
|
| - vsd(0x2a, dst, src1, isrc2, kF3, k0F, kW1);
|
| + vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW1);
|
| }
|
| void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
| - vsd(0x2a, dst, src1, src2, kF3, k0F, kW1);
|
| + vinstr(0x2a, dst, src1, src2, kF3, k0F, kW1);
|
| }
|
| void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
|
| XMMRegister isrc2 = {src2.code()};
|
| - vsd(0x2a, dst, src1, isrc2, kF2, k0F, kW1);
|
| + vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW1);
|
| }
|
| void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
| - vsd(0x2a, dst, src1, src2, kF2, k0F, kW1);
|
| + vinstr(0x2a, dst, src1, src2, kF2, k0F, kW1);
|
| }
|
| void vcvttss2si(Register dst, XMMRegister src) {
|
| XMMRegister idst = {dst.code()};
|
| - vsd(0x2c, idst, xmm0, src, kF3, k0F, kW0);
|
| + vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
|
| }
|
| void vcvttss2si(Register dst, const Operand& src) {
|
| XMMRegister idst = {dst.code()};
|
| - vsd(0x2c, idst, xmm0, src, kF3, k0F, kW0);
|
| + vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
|
| }
|
| void vcvttsd2si(Register dst, XMMRegister src) {
|
| XMMRegister idst = {dst.code()};
|
| - vsd(0x2c, idst, xmm0, src, kF2, k0F, kW0);
|
| + vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
|
| }
|
| void vcvttsd2si(Register dst, const Operand& src) {
|
| XMMRegister idst = {dst.code()};
|
| - vsd(0x2c, idst, xmm0, src, kF2, k0F, kW0);
|
| + vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
|
| }
|
| void vcvttss2siq(Register dst, XMMRegister src) {
|
| XMMRegister idst = {dst.code()};
|
| - vsd(0x2c, idst, xmm0, src, kF3, k0F, kW1);
|
| + vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
|
| }
|
| void vcvttss2siq(Register dst, const Operand& src) {
|
| XMMRegister idst = {dst.code()};
|
| - vsd(0x2c, idst, xmm0, src, kF3, k0F, kW1);
|
| + vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
|
| }
|
| void vcvttsd2siq(Register dst, XMMRegister src) {
|
| XMMRegister idst = {dst.code()};
|
| - vsd(0x2c, idst, xmm0, src, kF2, k0F, kW1);
|
| + vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
|
| }
|
| void vcvttsd2siq(Register dst, const Operand& src) {
|
| XMMRegister idst = {dst.code()};
|
| - vsd(0x2c, idst, xmm0, src, kF2, k0F, kW1);
|
| + vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
|
| }
|
| void vcvtsd2si(Register dst, XMMRegister src) {
|
| XMMRegister idst = {dst.code()};
|
| - vsd(0x2d, idst, xmm0, src, kF2, k0F, kW0);
|
| + vinstr(0x2d, idst, xmm0, src, kF2, k0F, kW0);
|
| }
|
| void vucomisd(XMMRegister dst, XMMRegister src) {
|
| - vsd(0x2e, dst, xmm0, src, k66, k0F, kWIG);
|
| + vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG);
|
| }
|
| void vucomisd(XMMRegister dst, const Operand& src) {
|
| - vsd(0x2e, dst, xmm0, src, k66, k0F, kWIG);
|
| + vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG);
|
| }
|
| void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
| RoundingMode mode) {
|
| - vsd(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
|
| + vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
|
| emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
|
| }
|
| void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
| RoundingMode mode) {
|
| - vsd(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
|
| + vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
|
| emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
|
| }
|
|
|
| void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
| - vsd(op, dst, src1, src2, kF2, k0F, kWIG);
|
| + vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
|
| }
|
| void vsd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2) {
|
| - vsd(op, dst, src1, src2, kF2, k0F, kWIG);
|
| + vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
|
| }
|
| - void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
|
| - SIMDPrefix pp, LeadingOpcode m, VexW w);
|
| - void vsd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2,
|
| - SIMDPrefix pp, LeadingOpcode m, VexW w);
|
|
|
| void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
|
| vss(0x10, dst, src1, src2);
|
| @@ -1616,6 +1699,101 @@ class Assembler : public AssemblerBase {
|
|
|
| #undef AVX_CMP_P
|
|
|
| + void vlddqu(XMMRegister dst, const Operand& src) {
|
| + vinstr(0xF0, dst, xmm0, src, kF2, k0F, kWIG);
|
| + }
|
| + void vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8) {
|
| + XMMRegister iop = {6};
|
| + vinstr(0x71, iop, dst, src, k66, k0F, kWIG);
|
| + emit(imm8);
|
| + }
|
| + void vpsrlw(XMMRegister dst, XMMRegister src, int8_t imm8) {
|
| + XMMRegister iop = {2};
|
| + vinstr(0x71, iop, dst, src, k66, k0F, kWIG);
|
| + emit(imm8);
|
| + }
|
| + void vpsraw(XMMRegister dst, XMMRegister src, int8_t imm8) {
|
| + XMMRegister iop = {4};
|
| + vinstr(0x71, iop, dst, src, k66, k0F, kWIG);
|
| + emit(imm8);
|
| + }
|
| + void vpslld(XMMRegister dst, XMMRegister src, int8_t imm8) {
|
| + XMMRegister iop = {6};
|
| + vinstr(0x72, iop, dst, src, k66, k0F, kWIG);
|
| + emit(imm8);
|
| + }
|
| + void vpsrld(XMMRegister dst, XMMRegister src, int8_t imm8) {
|
| + XMMRegister iop = {2};
|
| + vinstr(0x72, iop, dst, src, k66, k0F, kWIG);
|
| + emit(imm8);
|
| + }
|
| + void vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8) {
|
| + XMMRegister iop = {4};
|
| + vinstr(0x72, iop, dst, src, k66, k0F, kWIG);
|
| + emit(imm8);
|
| + }
|
| + void vpextrb(Register dst, XMMRegister src, int8_t imm8) {
|
| + XMMRegister idst = {dst.code()};
|
| + vinstr(0x14, src, xmm0, idst, k66, k0F3A, kW0);
|
| + emit(imm8);
|
| + }
|
| + void vpextrb(const Operand& dst, XMMRegister src, int8_t imm8) {
|
| + vinstr(0x14, src, xmm0, dst, k66, k0F3A, kW0);
|
| + emit(imm8);
|
| + }
|
| + void vpextrw(Register dst, XMMRegister src, int8_t imm8) {
|
| + XMMRegister idst = {dst.code()};
|
| + vinstr(0xc5, idst, xmm0, src, k66, k0F, kW0);
|
| + emit(imm8);
|
| + }
|
| + void vpextrw(const Operand& dst, XMMRegister src, int8_t imm8) {
|
| + vinstr(0x15, src, xmm0, dst, k66, k0F3A, kW0);
|
| + emit(imm8);
|
| + }
|
| + void vpextrd(Register dst, XMMRegister src, int8_t imm8) {
|
| + XMMRegister idst = {dst.code()};
|
| + vinstr(0x16, src, xmm0, idst, k66, k0F3A, kW0);
|
| + emit(imm8);
|
| + }
|
| + void vpextrd(const Operand& dst, XMMRegister src, int8_t imm8) {
|
| + vinstr(0x16, src, xmm0, dst, k66, k0F3A, kW0);
|
| + emit(imm8);
|
| + }
|
| + void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
|
| + XMMRegister isrc = {src2.code()};
|
| + vinstr(0x20, dst, src1, isrc, k66, k0F3A, kW0);
|
| + emit(imm8);
|
| + }
|
| + void vpinsrb(XMMRegister dst, XMMRegister src1, const Operand& src2,
|
| + int8_t imm8) {
|
| + vinstr(0x20, dst, src1, src2, k66, k0F3A, kW0);
|
| + emit(imm8);
|
| + }
|
| + void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
|
| + XMMRegister isrc = {src2.code()};
|
| + vinstr(0xc4, dst, src1, isrc, k66, k0F, kW0);
|
| + emit(imm8);
|
| + }
|
| + void vpinsrw(XMMRegister dst, XMMRegister src1, const Operand& src2,
|
| + int8_t imm8) {
|
| + vinstr(0xc4, dst, src1, src2, k66, k0F, kW0);
|
| + emit(imm8);
|
| + }
|
| + void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
|
| + XMMRegister isrc = {src2.code()};
|
| + vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW0);
|
| + emit(imm8);
|
| + }
|
| + void vpinsrd(XMMRegister dst, XMMRegister src1, const Operand& src2,
|
| + int8_t imm8) {
|
| + vinstr(0x22, dst, src1, src2, k66, k0F3A, kW0);
|
| + emit(imm8);
|
| + }
|
| + void vpshufd(XMMRegister dst, XMMRegister src, int8_t imm8) {
|
| + vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
|
| + emit(imm8);
|
| + }
|
| +
|
| void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
| void vps(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
|
| void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
|
|
|