Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(356)

Unified Diff: src/x64/assembler-x64.h

Issue 2328843003: [x64] supplement SSE insturctions for SIMD.js (Closed)
Patch Set: Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/v8.gyp ('k') | src/x64/assembler-x64.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/x64/assembler-x64.h
diff --git a/src/x64/assembler-x64.h b/src/x64/assembler-x64.h
index b2154fbaf4a61a7198481747f82b6f1a50ceae84..d29d9e113d9f3b3116aa54bac49baa17e9c14831 100644
--- a/src/x64/assembler-x64.h
+++ b/src/x64/assembler-x64.h
@@ -40,6 +40,7 @@
#include <deque>
#include "src/assembler.h"
+#include "src/x64/sse-instr.h"
namespace v8 {
namespace internal {
@@ -1072,7 +1073,91 @@ class Assembler : public AssemblerBase {
void movmskps(Register dst, XMMRegister src);
+ void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
+ SIMDPrefix pp, LeadingOpcode m, VexW w);
+ void vinstr(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2,
+ SIMDPrefix pp, LeadingOpcode m, VexW w);
+
// SSE2 instructions
+ void sse2_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape,
+ byte opcode);
+ void sse2_instr(XMMRegister dst, const Operand& src, byte prefix, byte escape,
+ byte opcode);
+#define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
+ void instruction(XMMRegister dst, XMMRegister src) { \
+ sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
+ } \
+ void instruction(XMMRegister dst, const Operand& src) { \
+ sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
+ }
+
+ SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
+#undef DECLARE_SSE2_INSTRUCTION
+
+#define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
+ void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
+ vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
+ } \
+ void v##instruction(XMMRegister dst, XMMRegister src1, \
+ const Operand& src2) { \
+ vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
+ }
+
+ SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
+#undef DECLARE_SSE2_AVX_INSTRUCTION
+
+ // SSE3
+ void lddqu(XMMRegister dst, const Operand& src);
+
+ // SSSE3
+ void ssse3_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
+ byte escape2, byte opcode);
+ void ssse3_instr(XMMRegister dst, const Operand& src, byte prefix,
+ byte escape1, byte escape2, byte opcode);
+
+#define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2, \
+ opcode) \
+ void instruction(XMMRegister dst, XMMRegister src) { \
+ ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
+ } \
+ void instruction(XMMRegister dst, const Operand& src) { \
+ ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
+ }
+
+ SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
+#undef DECLARE_SSSE3_INSTRUCTION
+
+ // SSE4
+ void sse4_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
+ byte escape2, byte opcode);
+ void sse4_instr(XMMRegister dst, const Operand& src, byte prefix,
+ byte escape1, byte escape2, byte opcode);
+#define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \
+ opcode) \
+ void instruction(XMMRegister dst, XMMRegister src) { \
+ sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
+ } \
+ void instruction(XMMRegister dst, const Operand& src) { \
+ sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
+ }
+
+ SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
+#undef DECLARE_SSE4_INSTRUCTION
+
+#define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \
+ opcode) \
+ void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
+ vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
+ } \
+ void v##instruction(XMMRegister dst, XMMRegister src1, \
+ const Operand& src2) { \
+ vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
+ }
+
+ SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
+ SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
+#undef DECLARE_SSE34_AVX_INSTRUCTION
+
void movd(XMMRegister dst, Register src);
void movd(XMMRegister dst, const Operand& src);
void movd(Register dst, XMMRegister src);
@@ -1101,8 +1186,12 @@ class Assembler : public AssemblerBase {
void psllq(XMMRegister reg, byte imm8);
void psrlq(XMMRegister reg, byte imm8);
+ void psllw(XMMRegister reg, byte imm8);
void pslld(XMMRegister reg, byte imm8);
+ void psrlw(XMMRegister reg, byte imm8);
void psrld(XMMRegister reg, byte imm8);
+ void psraw(XMMRegister reg, byte imm8);
+ void psrad(XMMRegister reg, byte imm8);
void cvttsd2si(Register dst, const Operand& src);
void cvttsd2si(Register dst, XMMRegister src);
@@ -1155,7 +1244,6 @@ class Assembler : public AssemblerBase {
void ucomisd(XMMRegister dst, XMMRegister src);
void ucomisd(XMMRegister dst, const Operand& src);
void cmpltsd(XMMRegister dst, XMMRegister src);
- void pcmpeqd(XMMRegister dst, XMMRegister src);
void movmskpd(Register dst, XMMRegister src);
@@ -1166,7 +1254,16 @@ class Assembler : public AssemblerBase {
// SSE 4.1 instruction
void insertps(XMMRegister dst, XMMRegister src, byte imm8);
void extractps(Register dst, XMMRegister src, byte imm8);
+ void pextrb(Register dst, XMMRegister src, int8_t imm8);
+ void pextrb(const Operand& dst, XMMRegister src, int8_t imm8);
+ void pextrw(Register dst, XMMRegister src, int8_t imm8);
+ void pextrw(const Operand& dst, XMMRegister src, int8_t imm8);
void pextrd(Register dst, XMMRegister src, int8_t imm8);
+ void pextrd(const Operand& dst, XMMRegister src, int8_t imm8);
+ void pinsrb(XMMRegister dst, Register src, int8_t imm8);
+ void pinsrb(XMMRegister dst, const Operand& src, int8_t imm8);
+ void pinsrw(XMMRegister dst, Register src, int8_t imm8);
+ void pinsrw(XMMRegister dst, const Operand& src, int8_t imm8);
void pinsrd(XMMRegister dst, Register src, int8_t imm8);
void pinsrd(XMMRegister dst, const Operand& src, int8_t imm8);
@@ -1208,18 +1305,9 @@ class Assembler : public AssemblerBase {
void movups(XMMRegister dst, XMMRegister src);
void movups(XMMRegister dst, const Operand& src);
void movups(const Operand& dst, XMMRegister src);
- void paddd(XMMRegister dst, XMMRegister src);
- void paddd(XMMRegister dst, const Operand& src);
- void psubd(XMMRegister dst, XMMRegister src);
- void psubd(XMMRegister dst, const Operand& src);
- void pmulld(XMMRegister dst, XMMRegister src);
- void pmulld(XMMRegister dst, const Operand& src);
- void pmuludq(XMMRegister dst, XMMRegister src);
- void pmuludq(XMMRegister dst, const Operand& src);
void psrldq(XMMRegister dst, uint8_t shift);
void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle);
- void cvtps2dq(XMMRegister dst, XMMRegister src);
- void cvtps2dq(XMMRegister dst, const Operand& src);
+ void pshufd(XMMRegister dst, const Operand& src, uint8_t shuffle);
void cvtdq2ps(XMMRegister dst, XMMRegister src);
void cvtdq2ps(XMMRegister dst, const Operand& src);
@@ -1421,7 +1509,6 @@ class Assembler : public AssemblerBase {
AVX_P_3(vand, 0x54);
AVX_P_3(vor, 0x56);
AVX_P_3(vxor, 0x57);
- AVX_3(vpcmpeqd, 0x76, vpd);
AVX_3(vcvtsd2ss, 0x5a, vsd);
#undef AVX_3
@@ -1440,102 +1527,98 @@ class Assembler : public AssemblerBase {
emit(imm8);
}
void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
- vsd(0x5a, dst, src1, src2, kF3, k0F, kWIG);
+ vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
}
void vcvtss2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
- vsd(0x5a, dst, src1, src2, kF3, k0F, kWIG);
+ vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
}
void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
XMMRegister isrc2 = {src2.code()};
- vsd(0x2a, dst, src1, isrc2, kF2, k0F, kW0);
+ vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW0);
}
void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
- vsd(0x2a, dst, src1, src2, kF2, k0F, kW0);
+ vinstr(0x2a, dst, src1, src2, kF2, k0F, kW0);
}
void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
XMMRegister isrc2 = {src2.code()};
- vsd(0x2a, dst, src1, isrc2, kF3, k0F, kW0);
+ vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW0);
}
void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
- vsd(0x2a, dst, src1, src2, kF3, k0F, kW0);
+ vinstr(0x2a, dst, src1, src2, kF3, k0F, kW0);
}
void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
XMMRegister isrc2 = {src2.code()};
- vsd(0x2a, dst, src1, isrc2, kF3, k0F, kW1);
+ vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW1);
}
void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
- vsd(0x2a, dst, src1, src2, kF3, k0F, kW1);
+ vinstr(0x2a, dst, src1, src2, kF3, k0F, kW1);
}
void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
XMMRegister isrc2 = {src2.code()};
- vsd(0x2a, dst, src1, isrc2, kF2, k0F, kW1);
+ vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW1);
}
void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
- vsd(0x2a, dst, src1, src2, kF2, k0F, kW1);
+ vinstr(0x2a, dst, src1, src2, kF2, k0F, kW1);
}
void vcvttss2si(Register dst, XMMRegister src) {
XMMRegister idst = {dst.code()};
- vsd(0x2c, idst, xmm0, src, kF3, k0F, kW0);
+ vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
}
void vcvttss2si(Register dst, const Operand& src) {
XMMRegister idst = {dst.code()};
- vsd(0x2c, idst, xmm0, src, kF3, k0F, kW0);
+ vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
}
void vcvttsd2si(Register dst, XMMRegister src) {
XMMRegister idst = {dst.code()};
- vsd(0x2c, idst, xmm0, src, kF2, k0F, kW0);
+ vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
}
void vcvttsd2si(Register dst, const Operand& src) {
XMMRegister idst = {dst.code()};
- vsd(0x2c, idst, xmm0, src, kF2, k0F, kW0);
+ vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
}
void vcvttss2siq(Register dst, XMMRegister src) {
XMMRegister idst = {dst.code()};
- vsd(0x2c, idst, xmm0, src, kF3, k0F, kW1);
+ vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
}
void vcvttss2siq(Register dst, const Operand& src) {
XMMRegister idst = {dst.code()};
- vsd(0x2c, idst, xmm0, src, kF3, k0F, kW1);
+ vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
}
void vcvttsd2siq(Register dst, XMMRegister src) {
XMMRegister idst = {dst.code()};
- vsd(0x2c, idst, xmm0, src, kF2, k0F, kW1);
+ vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
}
void vcvttsd2siq(Register dst, const Operand& src) {
XMMRegister idst = {dst.code()};
- vsd(0x2c, idst, xmm0, src, kF2, k0F, kW1);
+ vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
}
void vcvtsd2si(Register dst, XMMRegister src) {
XMMRegister idst = {dst.code()};
- vsd(0x2d, idst, xmm0, src, kF2, k0F, kW0);
+ vinstr(0x2d, idst, xmm0, src, kF2, k0F, kW0);
}
void vucomisd(XMMRegister dst, XMMRegister src) {
- vsd(0x2e, dst, xmm0, src, k66, k0F, kWIG);
+ vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG);
}
void vucomisd(XMMRegister dst, const Operand& src) {
- vsd(0x2e, dst, xmm0, src, k66, k0F, kWIG);
+ vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG);
}
void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2,
RoundingMode mode) {
- vsd(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
+ vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
}
void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
RoundingMode mode) {
- vsd(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
+ vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
}
void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
- vsd(op, dst, src1, src2, kF2, k0F, kWIG);
+ vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
}
void vsd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2) {
- vsd(op, dst, src1, src2, kF2, k0F, kWIG);
+ vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
}
- void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
- SIMDPrefix pp, LeadingOpcode m, VexW w);
- void vsd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2,
- SIMDPrefix pp, LeadingOpcode m, VexW w);
void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vss(0x10, dst, src1, src2);
@@ -1616,6 +1699,101 @@ class Assembler : public AssemblerBase {
#undef AVX_CMP_P
+ void vlddqu(XMMRegister dst, const Operand& src) {
+ vinstr(0xF0, dst, xmm0, src, kF2, k0F, kWIG);
+ }
+ void vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8) {
+ XMMRegister iop = {6};
+ vinstr(0x71, iop, dst, src, k66, k0F, kWIG);
+ emit(imm8);
+ }
+ void vpsrlw(XMMRegister dst, XMMRegister src, int8_t imm8) {
+ XMMRegister iop = {2};
+ vinstr(0x71, iop, dst, src, k66, k0F, kWIG);
+ emit(imm8);
+ }
+ void vpsraw(XMMRegister dst, XMMRegister src, int8_t imm8) {
+ XMMRegister iop = {4};
+ vinstr(0x71, iop, dst, src, k66, k0F, kWIG);
+ emit(imm8);
+ }
+ void vpslld(XMMRegister dst, XMMRegister src, int8_t imm8) {
+ XMMRegister iop = {6};
+ vinstr(0x72, iop, dst, src, k66, k0F, kWIG);
+ emit(imm8);
+ }
+ void vpsrld(XMMRegister dst, XMMRegister src, int8_t imm8) {
+ XMMRegister iop = {2};
+ vinstr(0x72, iop, dst, src, k66, k0F, kWIG);
+ emit(imm8);
+ }
+ void vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8) {
+ XMMRegister iop = {4};
+ vinstr(0x72, iop, dst, src, k66, k0F, kWIG);
+ emit(imm8);
+ }
+ void vpextrb(Register dst, XMMRegister src, int8_t imm8) {
+ XMMRegister idst = {dst.code()};
+ vinstr(0x14, src, xmm0, idst, k66, k0F3A, kW0);
+ emit(imm8);
+ }
+ void vpextrb(const Operand& dst, XMMRegister src, int8_t imm8) {
+ vinstr(0x14, src, xmm0, dst, k66, k0F3A, kW0);
+ emit(imm8);
+ }
+ void vpextrw(Register dst, XMMRegister src, int8_t imm8) {
+ XMMRegister idst = {dst.code()};
+ vinstr(0xc5, idst, xmm0, src, k66, k0F, kW0);
+ emit(imm8);
+ }
+ void vpextrw(const Operand& dst, XMMRegister src, int8_t imm8) {
+ vinstr(0x15, src, xmm0, dst, k66, k0F3A, kW0);
+ emit(imm8);
+ }
+ void vpextrd(Register dst, XMMRegister src, int8_t imm8) {
+ XMMRegister idst = {dst.code()};
+ vinstr(0x16, src, xmm0, idst, k66, k0F3A, kW0);
+ emit(imm8);
+ }
+ void vpextrd(const Operand& dst, XMMRegister src, int8_t imm8) {
+ vinstr(0x16, src, xmm0, dst, k66, k0F3A, kW0);
+ emit(imm8);
+ }
+ void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
+ XMMRegister isrc = {src2.code()};
+ vinstr(0x20, dst, src1, isrc, k66, k0F3A, kW0);
+ emit(imm8);
+ }
+ void vpinsrb(XMMRegister dst, XMMRegister src1, const Operand& src2,
+ int8_t imm8) {
+ vinstr(0x20, dst, src1, src2, k66, k0F3A, kW0);
+ emit(imm8);
+ }
+ void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
+ XMMRegister isrc = {src2.code()};
+ vinstr(0xc4, dst, src1, isrc, k66, k0F, kW0);
+ emit(imm8);
+ }
+ void vpinsrw(XMMRegister dst, XMMRegister src1, const Operand& src2,
+ int8_t imm8) {
+ vinstr(0xc4, dst, src1, src2, k66, k0F, kW0);
+ emit(imm8);
+ }
+ void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
+ XMMRegister isrc = {src2.code()};
+ vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW0);
+ emit(imm8);
+ }
+ void vpinsrd(XMMRegister dst, XMMRegister src1, const Operand& src2,
+ int8_t imm8) {
+ vinstr(0x22, dst, src1, src2, k66, k0F3A, kW0);
+ emit(imm8);
+ }
+ void vpshufd(XMMRegister dst, XMMRegister src, int8_t imm8) {
+ vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
+ emit(imm8);
+ }
+
void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vps(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
« no previous file with comments | « src/v8.gyp ('k') | src/x64/assembler-x64.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698