Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(146)

Unified Diff: unittest/IceAssemblerX8664Test.cpp

Issue 1224173006: Adds the x86-64 assembler. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments; changes emitRex.* logic. Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« src/IceRegistersX8664.h ('K') | « unittest/IceAssemblerX8632Test.cpp ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: unittest/IceAssemblerX8664Test.cpp
diff --git a/unittest/IceAssemblerX8664Test.cpp b/unittest/IceAssemblerX8664Test.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9528cb3b78952bf668434e19724f43a72c36c547
--- /dev/null
+++ b/unittest/IceAssemblerX8664Test.cpp
@@ -0,0 +1,8038 @@
+//===- subzero/unittest/IceAssemblerX8664.cpp - X8664 Assembler tests -----===//
+//
+// The Subzero Code Generator
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IceAssemblerX8664.h"
+
+#include "IceDefs.h"
+
+#include "gtest/gtest.h"
+
+#include <algorithm>
+#include <cstring>
+#include <errno.h>
+#include <iostream>
+#include <limits>
+#include <memory>
+#include <sys/mman.h>
+#include <type_traits>
+
+namespace Ice {
+namespace X8664 {
+namespace {
+class AssemblerX8664TestBase : public ::testing::Test {
+protected:
+ using Address = AssemblerX8664::Traits::Address;
+ using ByteRegister = AssemblerX8664::Traits::ByteRegister;
+ using Cond = AssemblerX8664::Traits::Cond;
+ using GPRRegister = AssemblerX8664::Traits::GPRRegister;
+ using Traits = AssemblerX8664::Traits;
+ using XmmRegister = AssemblerX8664::Traits::XmmRegister;
+
+// The following are "nicknames" for all possible GPRs in x86-64. With those, we
+// can use, e.g.,
+//
+// Encoded_GPR_al()
+//
+// instead of GPRRegister::Encoded_Reg_eax for 8 bit operands. They also
+// introduce "regular" nicknames for legacy x86-32 register (e.g., eax becomes
+// r1; esp, r0).
+#define LegacyRegAliases(NewName, Name64, Name32, Name16, Name8) \
+ static constexpr GPRRegister Encoded_GPR_##NewName() { \
+ return GPRRegister::Encoded_Reg_##Name32; \
+ } \
+ static constexpr GPRRegister Encoded_GPR_##NewName##q() { \
+ return GPRRegister::Encoded_Reg_##Name32; \
+ } \
+ static constexpr GPRRegister Encoded_GPR_##NewName##d() { \
+ return GPRRegister::Encoded_Reg_##Name32; \
+ } \
+ static constexpr GPRRegister Encoded_GPR_##NewName##w() { \
+ return GPRRegister::Encoded_Reg_##Name32; \
+ } \
+ static constexpr GPRRegister Encoded_GPR_##NewName##l() { \
+ return GPRRegister::Encoded_Reg_##Name32; \
+ } \
+ static constexpr GPRRegister Encoded_GPR_##Name64() { \
+ return GPRRegister::Encoded_Reg_##Name32; \
+ } \
+ static constexpr GPRRegister Encoded_GPR_##Name32() { \
+ return GPRRegister::Encoded_Reg_##Name32; \
+ } \
+ static constexpr GPRRegister Encoded_GPR_##Name16() { \
+ return GPRRegister::Encoded_Reg_##Name32; \
+ } \
+ static constexpr GPRRegister Encoded_GPR_##Name8() { \
+ return GPRRegister::Encoded_Reg_##Name32; \
+ }
+#define NewRegAliases(Name) \
+ static constexpr GPRRegister Encoded_GPR_##Name() { \
+ return GPRRegister::Encoded_Reg_##Name##d; \
+ } \
+ static constexpr GPRRegister Encoded_GPR_##Name##q() { \
+ return GPRRegister::Encoded_Reg_##Name##d; \
+ } \
+ static constexpr GPRRegister Encoded_GPR_##Name##d() { \
+ return GPRRegister::Encoded_Reg_##Name##d; \
+ } \
+ static constexpr GPRRegister Encoded_GPR_##Name##w() { \
+ return GPRRegister::Encoded_Reg_##Name##d; \
+ } \
+ static constexpr GPRRegister Encoded_GPR_##Name##l() { \
+ return GPRRegister::Encoded_Reg_##Name##d; \
+ }
+#define XmmRegAliases(Name) \
+ static constexpr XmmRegister Encoded_Xmm_##Name() { \
+ return XmmRegister::Encoded_Reg_##Name; \
+ }
+ LegacyRegAliases(r0, rsp, esp, sp, spl);
+ LegacyRegAliases(r1, rax, eax, ax, al);
+ LegacyRegAliases(r2, rbx, ebx, bx, bl);
+ LegacyRegAliases(r3, rcx, ecx, cx, cl);
+ LegacyRegAliases(r4, rdx, edx, dx, dl);
+ LegacyRegAliases(r5, rbp, ebp, bp, bpl);
+ LegacyRegAliases(r6, rsi, esi, si, sil);
+ LegacyRegAliases(r7, rdi, edi, di, dil);
+ NewRegAliases(r8);
+ NewRegAliases(r9);
+ NewRegAliases(r10);
+ NewRegAliases(r11);
+ NewRegAliases(r12);
+ NewRegAliases(r13);
+ NewRegAliases(r14);
+ NewRegAliases(r15);
+ XmmRegAliases(xmm0);
+ XmmRegAliases(xmm1);
+ XmmRegAliases(xmm2);
+ XmmRegAliases(xmm3);
+ XmmRegAliases(xmm4);
+ XmmRegAliases(xmm5);
+ XmmRegAliases(xmm6);
+ XmmRegAliases(xmm7);
+ XmmRegAliases(xmm8);
+ XmmRegAliases(xmm9);
+ XmmRegAliases(xmm10);
+ XmmRegAliases(xmm11);
+ XmmRegAliases(xmm12);
+ XmmRegAliases(xmm13);
+ XmmRegAliases(xmm14);
+ XmmRegAliases(xmm15);
+#undef XmmRegAliases
+#undef NewRegAliases
+#undef LegacyRegAliases
+
+ AssemblerX8664TestBase() { reset(); }
+
+ void reset() { Assembler.reset(new AssemblerX8664()); }
+
+ AssemblerX8664 *assembler() const { return Assembler.get(); }
+
+ size_t codeBytesSize() const { return Assembler->getBufferView().size(); }
+
+ const uint8_t *codeBytes() const {
+ return static_cast<const uint8_t *>(
+ static_cast<const void *>(Assembler->getBufferView().data()));
+ }
+
+private:
+ std::unique_ptr<AssemblerX8664> Assembler;
+};
+
+// __ is a helper macro. It allows test cases to emit X8664 assembly
+// instructions with
+//
+// __ mov(GPRRegister::Reg_Eax, 1);
+// __ ret();
+//
+// and so on. The idea of having this was "stolen" from dart's unit tests.
+#define __ (this->assembler())->
+
+// AssemblerX8664LowLevelTest verify that the "basic" instructions the tests
+// rely on are encoded correctly. Therefore, instead of executing the assembled
+// code, these tests will verify that the assembled bytes are sane.
+class AssemblerX8664LowLevelTest : public AssemblerX8664TestBase {
+protected:
+ // verifyBytes is a template helper that takes a Buffer, and a variable number
+ // of bytes. As the name indicates, it is used to verify the bytes for an
+ // instruction encoding.
+ template <int N, int I> static bool verifyBytes(const uint8_t *) {
+ static_assert(I == N, "Invalid template instantiation.");
+ return true;
+ }
+
+ template <int N, int I = 0, typename... Args>
+ static bool verifyBytes(const uint8_t *Buffer, uint8_t Byte,
+ Args... OtherBytes) {
+ static_assert(I < N, "Invalid template instantiation.");
+ EXPECT_EQ(Byte, Buffer[I]) << "Byte " << (I + 1) << " of " << N;
+ return verifyBytes<N, I + 1>(Buffer, OtherBytes...) && Buffer[I] == Byte;
+ }
+};
+
+TEST_F(AssemblerX8664LowLevelTest, Ret) {
+ __ ret();
+
+ constexpr size_t ByteCount = 1;
+ ASSERT_EQ(ByteCount, codeBytesSize());
+
+ verifyBytes<ByteCount>(codeBytes(), 0xc3);
+}
+
+TEST_F(AssemblerX8664LowLevelTest, RetImm) {
+ __ ret(Immediate(0x20));
+
+ constexpr size_t ByteCount = 3;
+ ASSERT_EQ(ByteCount, codeBytesSize());
+
+ verifyBytes<ByteCount>(codeBytes(), 0xC2, 0x20, 0x00);
+}
+
+TEST_F(AssemblerX8664LowLevelTest, CallImm4) {
+ __ call(Immediate(4));
+
+ constexpr size_t ByteCount = 5;
+ ASSERT_EQ(ByteCount, codeBytesSize());
+
+ verifyBytes<ByteCount>(codeBytes(), 0xe8, 0x00, 0x00, 0x00, 0x00);
+}
+
+TEST_F(AssemblerX8664LowLevelTest, PopRegs) {
+ __ popl(Encoded_GPR_eax());
+ __ popl(Encoded_GPR_ebx());
+ __ popl(Encoded_GPR_ecx());
+ __ popl(Encoded_GPR_edx());
+ __ popl(Encoded_GPR_edi());
+ __ popl(Encoded_GPR_esi());
+ __ popl(Encoded_GPR_ebp());
+ __ popl(Encoded_GPR_r8());
+ __ popl(Encoded_GPR_r9());
+ __ popl(Encoded_GPR_r10());
+ __ popl(Encoded_GPR_r11());
+ __ popl(Encoded_GPR_r12());
+ __ popl(Encoded_GPR_r13());
+ __ popl(Encoded_GPR_r14());
+ __ popl(Encoded_GPR_r15());
+
+ constexpr size_t ByteCount = 23;
+ ASSERT_EQ(ByteCount, codeBytesSize());
+
+ constexpr uint8_t Rex_B = 0x41;
+ constexpr uint8_t PopOpcode = 0x58;
+ verifyBytes<ByteCount>(
+ codeBytes(), PopOpcode | Encoded_GPR_eax(), PopOpcode | Encoded_GPR_ebx(),
+ PopOpcode | Encoded_GPR_ecx(), PopOpcode | Encoded_GPR_edx(),
+ PopOpcode | Encoded_GPR_edi(), PopOpcode | Encoded_GPR_esi(),
+ PopOpcode | Encoded_GPR_ebp(), Rex_B, PopOpcode | (Encoded_GPR_r8() & 7),
+ Rex_B, PopOpcode | (Encoded_GPR_r9() & 7), Rex_B,
+ PopOpcode | (Encoded_GPR_r10() & 7), Rex_B,
+ PopOpcode | (Encoded_GPR_r11() & 7), Rex_B,
+ PopOpcode | (Encoded_GPR_r12() & 7), Rex_B,
+ PopOpcode | (Encoded_GPR_r13() & 7), Rex_B,
+ PopOpcode | (Encoded_GPR_r14() & 7), Rex_B,
+ PopOpcode | (Encoded_GPR_r15() & 7));
+}
+
+TEST_F(AssemblerX8664LowLevelTest, PushRegs) {
+ __ pushl(Encoded_GPR_eax());
+ __ pushl(Encoded_GPR_ebx());
+ __ pushl(Encoded_GPR_ecx());
+ __ pushl(Encoded_GPR_edx());
+ __ pushl(Encoded_GPR_edi());
+ __ pushl(Encoded_GPR_esi());
+ __ pushl(Encoded_GPR_ebp());
+ __ pushl(Encoded_GPR_r8());
+ __ pushl(Encoded_GPR_r9());
+ __ pushl(Encoded_GPR_r10());
+ __ pushl(Encoded_GPR_r11());
+ __ pushl(Encoded_GPR_r12());
+ __ pushl(Encoded_GPR_r13());
+ __ pushl(Encoded_GPR_r14());
+ __ pushl(Encoded_GPR_r15());
+
+ constexpr size_t ByteCount = 23;
+ ASSERT_EQ(ByteCount, codeBytesSize());
+
+ constexpr uint8_t Rex_B = 0x41;
+ constexpr uint8_t PushOpcode = 0x50;
+ verifyBytes<ByteCount>(
+ codeBytes(), PushOpcode | Encoded_GPR_eax(),
+ PushOpcode | Encoded_GPR_ebx(), PushOpcode | Encoded_GPR_ecx(),
+ PushOpcode | Encoded_GPR_edx(), PushOpcode | Encoded_GPR_edi(),
+ PushOpcode | Encoded_GPR_esi(), PushOpcode | Encoded_GPR_ebp(), Rex_B,
+ PushOpcode | (Encoded_GPR_r8() & 7), Rex_B,
+ PushOpcode | (Encoded_GPR_r9() & 7), Rex_B,
+ PushOpcode | (Encoded_GPR_r10() & 7), Rex_B,
+ PushOpcode | (Encoded_GPR_r11() & 7), Rex_B,
+ PushOpcode | (Encoded_GPR_r12() & 7), Rex_B,
+ PushOpcode | (Encoded_GPR_r13() & 7), Rex_B,
+ PushOpcode | (Encoded_GPR_r14() & 7), Rex_B,
+ PushOpcode | (Encoded_GPR_r15() & 7));
+}
+
+TEST_F(AssemblerX8664LowLevelTest, MovRegisterZero) {
+ __ mov(IceType_i32, Encoded_GPR_eax(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_ebx(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_ecx(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_edx(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_edi(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_esi(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_ebp(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_r8(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_r10(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_r11(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_r12(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_r13(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_r14(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_r15(), Immediate(0x00));
+
+ constexpr uint8_t Rex_B = 0x41;
+ constexpr size_t MovReg32BitImmBytes = 5;
+ constexpr size_t ByteCount = 14 * MovReg32BitImmBytes + 7 /*Rex_B*/;
+
+ ASSERT_EQ(ByteCount, codeBytesSize());
+
+ constexpr uint8_t MovOpcode = 0xb8;
+ verifyBytes<ByteCount>(
+ codeBytes(), MovOpcode | Encoded_GPR_eax(), 0x00, 0x00, 0x00, 0x00,
+ MovOpcode | Encoded_GPR_ebx(), 0x00, 0x00, 0x00, 0x00,
+ MovOpcode | Encoded_GPR_ecx(), 0x00, 0x00, 0x00, 0x00,
+ MovOpcode | Encoded_GPR_edx(), 0x00, 0x00, 0x00, 0x00,
+ MovOpcode | Encoded_GPR_edi(), 0x00, 0x00, 0x00, 0x00,
+ MovOpcode | Encoded_GPR_esi(), 0x00, 0x00, 0x00, 0x00,
+ MovOpcode | Encoded_GPR_ebp(), 0x00, 0x00, 0x00, 0x00, Rex_B,
+ MovOpcode | (Encoded_GPR_r8() & 7), 0x00, 0x00, 0x00, 0x00, Rex_B,
+ MovOpcode | (Encoded_GPR_r10() & 7), 0x00, 0x00, 0x00, 0x00, Rex_B,
+ MovOpcode | (Encoded_GPR_r11() & 7), 0x00, 0x00, 0x00, 0x00, Rex_B,
+ MovOpcode | (Encoded_GPR_r12() & 7), 0x00, 0x00, 0x00, 0x00, Rex_B,
+ MovOpcode | (Encoded_GPR_r13() & 7), 0x00, 0x00, 0x00, 0x00, Rex_B,
+ MovOpcode | (Encoded_GPR_r14() & 7), 0x00, 0x00, 0x00, 0x00, Rex_B,
+ MovOpcode | (Encoded_GPR_r15() & 7), 0x00, 0x00, 0x00, 0x00);
+}
+
+TEST_F(AssemblerX8664LowLevelTest, Cmp) {
+#define TestRegReg(Inst, Dst, Src, OpType, ByteCountUntyped, ...) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Dst ", " #Src ", " #OpType ", " #ByteCountUntyped \
+ ", " #__VA_ARGS__ ")"; \
+ static constexpr uint8_t ByteCount = ByteCountUntyped; \
+ __ Inst(IceType_##OpType, Encoded_GPR_##Dst(), Encoded_GPR_##Src()); \
+ ASSERT_EQ(ByteCount, codeBytesSize()) << TestString; \
+ ASSERT_TRUE(verifyBytes<ByteCount>(codeBytes(), __VA_ARGS__)) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestRegImm(Inst, Dst, Imm, OpType, ByteCountUntyped, ...) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Dst ", " #Imm ", " #OpType ", " #ByteCountUntyped \
+ ", " #__VA_ARGS__ ")"; \
+ static constexpr uint8_t ByteCount = ByteCountUntyped; \
+ __ Inst(IceType_##OpType, Encoded_GPR_##Dst(), Immediate(Imm)); \
+ ASSERT_EQ(ByteCount, codeBytesSize()) << TestString; \
+ ASSERT_TRUE(verifyBytes<ByteCount>(codeBytes(), __VA_ARGS__)) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestRegAbsoluteAddr(Inst, Dst, Disp, OpType, ByteCountUntyped, ...) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Dst ", " #Disp ", " #OpType ", " #ByteCountUntyped \
+ ", " #__VA_ARGS__ ")"; \
+ static constexpr uint8_t ByteCount = ByteCountUntyped; \
+ __ Inst(IceType_##OpType, Encoded_GPR_##Dst(), \
+ Address(Address::ABSOLUTE, Disp)); \
+ ASSERT_EQ(ByteCount, codeBytesSize()) << TestString; \
+ ASSERT_TRUE(verifyBytes<ByteCount>(codeBytes(), __VA_ARGS__)) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestRegAddrBase(Inst, Dst, Base, Disp, OpType, ByteCountUntyped, ...) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Dst ", " #Base ", " #Disp ", " #OpType \
+ ", " #ByteCountUntyped ", " #__VA_ARGS__ ")"; \
+ static constexpr uint8_t ByteCount = ByteCountUntyped; \
+ __ Inst(IceType_##OpType, Encoded_GPR_##Dst(), \
+ Address(Encoded_GPR_##Base(), Disp)); \
+ ASSERT_EQ(ByteCount, codeBytesSize()) << TestString; \
+ ASSERT_TRUE(verifyBytes<ByteCount>(codeBytes(), __VA_ARGS__)) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestRegAddrScaledIndex(Inst, Dst, Index, Scale, Disp, OpType, \
+ ByteCountUntyped, ...) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Dst ", " #Index ", " #Scale ", " #Disp ", " #OpType \
+ ", " #ByteCountUntyped ", " #__VA_ARGS__ ")"; \
+ static constexpr uint8_t ByteCount = ByteCountUntyped; \
+ __ Inst(IceType_##OpType, Encoded_GPR_##Dst(), \
+ Address(Encoded_GPR_##Index(), Traits::TIMES_##Scale, Disp)); \
+ ASSERT_EQ(ByteCount, codeBytesSize()) << TestString; \
+ ASSERT_TRUE(verifyBytes<ByteCount>(codeBytes(), __VA_ARGS__)) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestRegAddrBaseScaledIndex(Inst, Dst, Base, Index, Scale, Disp, \
+ OpType, ByteCountUntyped, ...) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Dst ", " #Base ", " #Index ", " #Scale ", " #Disp \
+ ", " #OpType ", " #ByteCountUntyped ", " #__VA_ARGS__ ")"; \
+ static constexpr uint8_t ByteCount = ByteCountUntyped; \
+ __ Inst(IceType_##OpType, Encoded_GPR_##Dst(), \
+ Address(Encoded_GPR_##Base(), Encoded_GPR_##Index(), \
+ Traits::TIMES_##Scale, Disp)); \
+ ASSERT_EQ(ByteCount, codeBytesSize()) << TestString; \
+ ASSERT_TRUE(verifyBytes<ByteCount>(codeBytes(), __VA_ARGS__)) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestAddrBaseScaledIndexImm(Inst, Base, Index, Scale, Disp, Imm, \
+ OpType, ByteCountUntyped, ...) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Base ", " #Index ", " #Scale ", " #Disp ", " #Imm \
+ ", " #OpType ", " #ByteCountUntyped ", " #__VA_ARGS__ ")"; \
+ static constexpr uint8_t ByteCount = ByteCountUntyped; \
+ __ Inst(IceType_##OpType, \
+ Address(Encoded_GPR_##Base(), Encoded_GPR_##Index(), \
+ Traits::TIMES_##Scale, Disp), \
+ Immediate(Imm)); \
+ ASSERT_EQ(ByteCount, codeBytesSize()) << TestString; \
+ ASSERT_TRUE(verifyBytes<ByteCount>(codeBytes(), __VA_ARGS__)) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestAddrBaseScaledIndexReg(Inst, Base, Index, Scale, Disp, Src, \
+ OpType, ByteCountUntyped, ...) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Base ", " #Index ", " #Scale ", " #Disp ", " #Src \
+ ", " #OpType ", " #ByteCountUntyped ", " #__VA_ARGS__ ")"; \
+ static constexpr uint8_t ByteCount = ByteCountUntyped; \
+ __ Inst(IceType_##OpType, \
+ Address(Encoded_GPR_##Base(), Encoded_GPR_##Index(), \
+ Traits::TIMES_##Scale, Disp), \
+ Encoded_GPR_##Src()); \
+ ASSERT_EQ(ByteCount, codeBytesSize()) << TestString; \
+ ASSERT_TRUE(verifyBytes<ByteCount>(codeBytes(), __VA_ARGS__)) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+ /* cmp GPR, GPR */
+ TestRegReg(cmp, eax, ecx, i32, 2, 0x3B, 0xC1);
+ TestRegReg(cmp, ecx, edx, i32, 2, 0x3B, 0xCA);
+ TestRegReg(cmp, edx, ebx, i32, 2, 0x3B, 0xD3);
+ TestRegReg(cmp, ebx, esp, i32, 2, 0x3B, 0xDC);
+ TestRegReg(cmp, esp, ebp, i32, 2, 0x3B, 0xE5);
+ TestRegReg(cmp, ebp, esi, i32, 2, 0x3B, 0xEE);
+ TestRegReg(cmp, esi, edi, i32, 2, 0x3B, 0xF7);
+ TestRegReg(cmp, edi, r8, i32, 3, 0x41, 0x3B, 0xF8);
+ TestRegReg(cmp, r8, r9, i32, 3, 0x45, 0x3B, 0xC1);
+ TestRegReg(cmp, r9, r10, i32, 3, 0x45, 0x3B, 0xCA);
+ TestRegReg(cmp, r10, r11, i32, 3, 0x45, 0x3B, 0xD3);
+ TestRegReg(cmp, r11, r12, i32, 3, 0x45, 0x3B, 0xDC);
+ TestRegReg(cmp, r12, r13, i32, 3, 0x45, 0x3B, 0xE5);
+ TestRegReg(cmp, r13, r14, i32, 3, 0x45, 0x3B, 0xEE);
+ TestRegReg(cmp, r14, r15, i32, 3, 0x45, 0x3B, 0xF7);
+ TestRegReg(cmp, r15, eax, i32, 3, 0x44, 0x3B, 0xF8);
+
+ TestRegReg(cmp, eax, ecx, i16, 3, 0x66, 0x3B, 0xC1);
+ TestRegReg(cmp, ecx, edx, i16, 3, 0x66, 0x3B, 0xCA);
+ TestRegReg(cmp, edx, ebx, i16, 3, 0x66, 0x3B, 0xD3);
+ TestRegReg(cmp, ebx, esp, i16, 3, 0x66, 0x3B, 0xDC);
+ TestRegReg(cmp, esp, ebp, i16, 3, 0x66, 0x3B, 0xE5);
+ TestRegReg(cmp, ebp, esi, i16, 3, 0x66, 0x3B, 0xEE);
+ TestRegReg(cmp, esi, edi, i16, 3, 0x66, 0x3B, 0xF7);
+ TestRegReg(cmp, edi, r8, i16, 4, 0x66, 0x41, 0x3B, 0xF8);
+ TestRegReg(cmp, r8, r9, i16, 4, 0x66, 0x45, 0x3B, 0xC1);
+ TestRegReg(cmp, r9, r10, i16, 4, 0x66, 0x45, 0x3B, 0xCA);
+ TestRegReg(cmp, r10, r11, i16, 4, 0x66, 0x45, 0x3B, 0xD3);
+ TestRegReg(cmp, r11, r12, i16, 4, 0x66, 0x45, 0x3B, 0xDC);
+ TestRegReg(cmp, r12, r13, i16, 4, 0x66, 0x45, 0x3B, 0xE5);
+ TestRegReg(cmp, r13, r14, i16, 4, 0x66, 0x45, 0x3B, 0xEE);
+ TestRegReg(cmp, r14, r15, i16, 4, 0x66, 0x45, 0x3B, 0xF7);
+ TestRegReg(cmp, r15, eax, i16, 4, 0x66, 0x44, 0x3B, 0xF8);
+
+ TestRegReg(cmp, eax, ecx, i8, 2, 0x3A, 0xC1);
+ TestRegReg(cmp, ecx, edx, i8, 2, 0x3A, 0xCA);
+ TestRegReg(cmp, edx, ebx, i8, 2, 0x3A, 0xD3);
+ TestRegReg(cmp, ebx, esp, i8, 3, 0x40, 0x3A, 0xDC);
+ TestRegReg(cmp, esp, ebp, i8, 3, 0x40, 0x3A, 0xE5);
+ TestRegReg(cmp, ebp, esi, i8, 3, 0x40, 0x3A, 0xEE);
+ TestRegReg(cmp, esi, edi, i8, 3, 0x40, 0x3A, 0xF7);
+ TestRegReg(cmp, edi, r8, i8, 3, 0x41, 0x3A, 0xF8);
+ TestRegReg(cmp, r8, r9, i8, 3, 0x45, 0x3A, 0xC1);
+ TestRegReg(cmp, r9, r10, i8, 3, 0x45, 0x3A, 0xCA);
+ TestRegReg(cmp, r10, r11, i8, 3, 0x45, 0x3A, 0xD3);
+ TestRegReg(cmp, r11, r12, i8, 3, 0x45, 0x3A, 0xDC);
+ TestRegReg(cmp, r12, r13, i8, 3, 0x45, 0x3A, 0xE5);
+ TestRegReg(cmp, r13, r14, i8, 3, 0x45, 0x3A, 0xEE);
+ TestRegReg(cmp, r14, r15, i8, 3, 0x45, 0x3A, 0xF7);
+ TestRegReg(cmp, r15, eax, i8, 3, 0x44, 0x3A, 0xF8);
+
+ /* cmp GPR, Imm8 */
+ TestRegImm(cmp, eax, 5, i32, 3, 0x83, 0xF8, 0x05);
+ TestRegImm(cmp, ecx, 5, i32, 3, 0x83, 0xF9, 0x05);
+ TestRegImm(cmp, edx, 5, i32, 3, 0x83, 0xFA, 0x05);
+ TestRegImm(cmp, ebx, 5, i32, 3, 0x83, 0xFB, 0x05);
+ TestRegImm(cmp, esp, 5, i32, 3, 0x83, 0xFC, 0x05);
+ TestRegImm(cmp, ebp, 5, i32, 3, 0x83, 0xFD, 0x05);
+ TestRegImm(cmp, esi, 5, i32, 3, 0x83, 0xFE, 0x05);
+ TestRegImm(cmp, edi, 5, i32, 3, 0x83, 0xFF, 0x05);
+ TestRegImm(cmp, r8, 5, i32, 4, 0x41, 0x83, 0xF8, 0x05);
+ TestRegImm(cmp, r9, 5, i32, 4, 0x41, 0x83, 0xF9, 0x05);
+ TestRegImm(cmp, r10, 5, i32, 4, 0x41, 0x83, 0xFA, 0x05);
+ TestRegImm(cmp, r11, 5, i32, 4, 0x41, 0x83, 0xFB, 0x05);
+ TestRegImm(cmp, r12, 5, i32, 4, 0x41, 0x83, 0xFC, 0x05);
+ TestRegImm(cmp, r13, 5, i32, 4, 0x41, 0x83, 0xFD, 0x05);
+ TestRegImm(cmp, r14, 5, i32, 4, 0x41, 0x83, 0xFE, 0x05);
+ TestRegImm(cmp, r15, 5, i32, 4, 0x41, 0x83, 0xFF, 0x05);
+
+ TestRegImm(cmp, eax, 5, i16, 4, 0x66, 0x83, 0xF8, 0x05);
+ TestRegImm(cmp, ecx, 5, i16, 4, 0x66, 0x83, 0xF9, 0x05);
+ TestRegImm(cmp, edx, 5, i16, 4, 0x66, 0x83, 0xFA, 0x05);
+ TestRegImm(cmp, ebx, 5, i16, 4, 0x66, 0x83, 0xFB, 0x05);
+ TestRegImm(cmp, esp, 5, i16, 4, 0x66, 0x83, 0xFC, 0x05);
+ TestRegImm(cmp, ebp, 5, i16, 4, 0x66, 0x83, 0xFD, 0x05);
+ TestRegImm(cmp, esi, 5, i16, 4, 0x66, 0x83, 0xFE, 0x05);
+ TestRegImm(cmp, edi, 5, i16, 4, 0x66, 0x83, 0xFF, 0x05);
+ TestRegImm(cmp, r8, 5, i16, 5, 0x66, 0x41, 0x83, 0xF8, 0x05);
+ TestRegImm(cmp, r9, 5, i16, 5, 0x66, 0x41, 0x83, 0xF9, 0x05);
+ TestRegImm(cmp, r10, 5, i16, 5, 0x66, 0x41, 0x83, 0xFA, 0x05);
+ TestRegImm(cmp, r11, 5, i16, 5, 0x66, 0x41, 0x83, 0xFB, 0x05);
+ TestRegImm(cmp, r12, 5, i16, 5, 0x66, 0x41, 0x83, 0xFC, 0x05);
+ TestRegImm(cmp, r13, 5, i16, 5, 0x66, 0x41, 0x83, 0xFD, 0x05);
+ TestRegImm(cmp, r14, 5, i16, 5, 0x66, 0x41, 0x83, 0xFE, 0x05);
+ TestRegImm(cmp, r15, 5, i16, 5, 0x66, 0x41, 0x83, 0xFF, 0x05);
+
+ TestRegImm(cmp, eax, 5, i8, 2, 0x3C, 0x05);
+ TestRegImm(cmp, ecx, 5, i8, 3, 0x80, 0xF9, 0x05);
+ TestRegImm(cmp, edx, 5, i8, 3, 0x80, 0xFA, 0x05);
+ TestRegImm(cmp, ebx, 5, i8, 3, 0x80, 0xFB, 0x05);
+ TestRegImm(cmp, esp, 5, i8, 4, 0x40, 0x80, 0xFC, 0x05);
+ TestRegImm(cmp, ebp, 5, i8, 4, 0x40, 0x80, 0xFD, 0x05);
+ TestRegImm(cmp, esi, 5, i8, 4, 0x40, 0x80, 0xFE, 0x05);
+ TestRegImm(cmp, edi, 5, i8, 4, 0x40, 0x80, 0xFF, 0x05);
+ TestRegImm(cmp, r8, 5, i8, 4, 0x41, 0x80, 0xF8, 0x05);
+ TestRegImm(cmp, r9, 5, i8, 4, 0x41, 0x80, 0xF9, 0x05);
+ TestRegImm(cmp, r10, 5, i8, 4, 0x41, 0x80, 0xFA, 0x05);
+ TestRegImm(cmp, r11, 5, i8, 4, 0x41, 0x80, 0xFB, 0x05);
+ TestRegImm(cmp, r12, 5, i8, 4, 0x41, 0x80, 0xFC, 0x05);
+ TestRegImm(cmp, r13, 5, i8, 4, 0x41, 0x80, 0xFD, 0x05);
+ TestRegImm(cmp, r14, 5, i8, 4, 0x41, 0x80, 0xFE, 0x05);
+ TestRegImm(cmp, r15, 5, i8, 4, 0x41, 0x80, 0xFF, 0x05);
+
+ /* cmp GPR, Imm16 */
+ TestRegImm(cmp, eax, 0x100, i32, 5, 0x3D, 0x00, 0x01, 0x00, 0x00);
+ TestRegImm(cmp, ecx, 0x100, i32, 6, 0x81, 0xF9, 0x00, 0x01, 0x00, 0x00);
+ TestRegImm(cmp, edx, 0x100, i32, 6, 0x81, 0xFA, 0x00, 0x01, 0x00, 0x00);
+ TestRegImm(cmp, ebx, 0x100, i32, 6, 0x81, 0xFB, 0x00, 0x01, 0x00, 0x00);
+ TestRegImm(cmp, esp, 0x100, i32, 6, 0x81, 0xFC, 0x00, 0x01, 0x00, 0x00);
+ TestRegImm(cmp, ebp, 0x100, i32, 6, 0x81, 0xFD, 0x00, 0x01, 0x00, 0x00);
+ TestRegImm(cmp, esi, 0x100, i32, 6, 0x81, 0xFE, 0x00, 0x01, 0x00, 0x00);
+ TestRegImm(cmp, edi, 0x100, i32, 6, 0x81, 0xFF, 0x00, 0x01, 0x00, 0x00);
+ TestRegImm(cmp, r8, 0x100, i32, 7, 0x41, 0x81, 0xF8, 0x00, 0x01, 0x00, 0x00);
+ TestRegImm(cmp, r9, 0x100, i32, 7, 0x41, 0x81, 0xF9, 0x00, 0x01, 0x00, 0x00);
+ TestRegImm(cmp, r10, 0x100, i32, 7, 0x41, 0x81, 0xFA, 0x00, 0x01, 0x00, 0x00);
+ TestRegImm(cmp, r11, 0x100, i32, 7, 0x41, 0x81, 0xFB, 0x00, 0x01, 0x00, 0x00);
+ TestRegImm(cmp, r12, 0x100, i32, 7, 0x41, 0x81, 0xFC, 0x00, 0x01, 0x00, 0x00);
+ TestRegImm(cmp, r13, 0x100, i32, 7, 0x41, 0x81, 0xFD, 0x00, 0x01, 0x00, 0x00);
+ TestRegImm(cmp, r14, 0x100, i32, 7, 0x41, 0x81, 0xFE, 0x00, 0x01, 0x00, 0x00);
+ TestRegImm(cmp, r15, 0x100, i32, 7, 0x41, 0x81, 0xFF, 0x00, 0x01, 0x00, 0x00);
+
+ TestRegImm(cmp, eax, 0x100, i16, 4, 0x66, 0x3D, 0x00, 0x01);
+ TestRegImm(cmp, ecx, 0x100, i16, 5, 0x66, 0x81, 0xF9, 0x00, 0x01);
+ TestRegImm(cmp, edx, 0x100, i16, 5, 0x66, 0x81, 0xFA, 0x00, 0x01);
+ TestRegImm(cmp, ebx, 0x100, i16, 5, 0x66, 0x81, 0xFB, 0x00, 0x01);
+ TestRegImm(cmp, esp, 0x100, i16, 5, 0x66, 0x81, 0xFC, 0x00, 0x01);
+ TestRegImm(cmp, ebp, 0x100, i16, 5, 0x66, 0x81, 0xFD, 0x00, 0x01);
+ TestRegImm(cmp, esi, 0x100, i16, 5, 0x66, 0x81, 0xFE, 0x00, 0x01);
+ TestRegImm(cmp, edi, 0x100, i16, 5, 0x66, 0x81, 0xFF, 0x00, 0x01);
+ TestRegImm(cmp, r8, 0x100, i16, 6, 0x66, 0x41, 0x81, 0xF8, 0x00, 0x01);
+ TestRegImm(cmp, r9, 0x100, i16, 6, 0x66, 0x41, 0x81, 0xF9, 0x00, 0x01);
+ TestRegImm(cmp, r10, 0x100, i16, 6, 0x66, 0x41, 0x81, 0xFA, 0x00, 0x01);
+ TestRegImm(cmp, r11, 0x100, i16, 6, 0x66, 0x41, 0x81, 0xFB, 0x00, 0x01);
+ TestRegImm(cmp, r12, 0x100, i16, 6, 0x66, 0x41, 0x81, 0xFC, 0x00, 0x01);
+ TestRegImm(cmp, r13, 0x100, i16, 6, 0x66, 0x41, 0x81, 0xFD, 0x00, 0x01);
+ TestRegImm(cmp, r14, 0x100, i16, 6, 0x66, 0x41, 0x81, 0xFE, 0x00, 0x01);
+ TestRegImm(cmp, r15, 0x100, i16, 6, 0x66, 0x41, 0x81, 0xFF, 0x00, 0x01);
+
+ /* cmp GPR, Absolute */
+ TestRegAbsoluteAddr(cmp, eax, 0xF00FBEEF, i32, 6, 0x3B, 0x05, 0xEF, 0xBE,
+ 0x0F, 0xF0);
+ TestRegAbsoluteAddr(cmp, eax, 0xF00FBEEF, i16, 7, 0x66, 0x3B, 0x05, 0xEF,
+ 0xBE, 0x0F, 0xF0);
+ TestRegAbsoluteAddr(cmp, eax, 0xF00FBEEF, i8, 6, 0x3A, 0x05, 0xEF, 0xBE, 0x0F,
+ 0xF0);
+ TestRegAbsoluteAddr(cmp, r8, 0xF00FBEEF, i32, 7, 0x44, 0x3B, 0x05, 0xEF, 0xBE,
+ 0x0F, 0xF0);
+ TestRegAbsoluteAddr(cmp, r8, 0xF00FBEEF, i16, 8, 0x66, 0x44, 0x3B, 0x05, 0xEF,
+ 0xBE, 0x0F, 0xF0);
+ TestRegAbsoluteAddr(cmp, r8, 0xF00FBEEF, i8, 7, 0x44, 0x3A, 0x05, 0xEF, 0xBE,
+ 0x0F, 0xF0);
+
+ /* cmp GPR, 0(Base) */
+ TestRegAddrBase(cmp, eax, ecx, 0, i32, 2, 0x3B, 0x01);
+ TestRegAddrBase(cmp, ecx, edx, 0, i32, 2, 0x3B, 0x0A);
+ TestRegAddrBase(cmp, edx, ebx, 0, i32, 2, 0x3B, 0x13);
+ TestRegAddrBase(cmp, ebx, esp, 0, i32, 3, 0x3B, 0x1C, 0x24);
+ TestRegAddrBase(cmp, esp, ebp, 0, i32, 3, 0x3B, 0x65, 0x00);
+ TestRegAddrBase(cmp, ebp, esi, 0, i32, 2, 0x3B, 0x2E);
+ TestRegAddrBase(cmp, esi, edi, 0, i32, 2, 0x3B, 0x37);
+ TestRegAddrBase(cmp, edi, r8, 0, i32, 3, 0x41, 0x3B, 0x38);
+ TestRegAddrBase(cmp, r8, r9, 0, i32, 3, 0x45, 0x3B, 0x01);
+ TestRegAddrBase(cmp, r9, r10, 0, i32, 3, 0x45, 0x3B, 0x0A);
+ TestRegAddrBase(cmp, r10, r11, 0, i32, 3, 0x45, 0x3B, 0x13);
+ TestRegAddrBase(cmp, r11, r12, 0, i32, 4, 0x45, 0x3B, 0x1C, 0x24);
+ TestRegAddrBase(cmp, r12, r13, 0, i32, 4, 0x45, 0x3B, 0x65, 0x00);
+ TestRegAddrBase(cmp, r13, r14, 0, i32, 3, 0x45, 0x3B, 0x2E);
+ TestRegAddrBase(cmp, r14, r15, 0, i32, 3, 0x45, 0x3B, 0x37);
+ TestRegAddrBase(cmp, r15, eax, 0, i32, 3, 0x44, 0x3B, 0x38);
+
+ TestRegAddrBase(cmp, eax, ecx, 0, i16, 3, 0x66, 0x3B, 0x01);
+ TestRegAddrBase(cmp, ecx, edx, 0, i16, 3, 0x66, 0x3B, 0x0A);
+ TestRegAddrBase(cmp, edx, ebx, 0, i16, 3, 0x66, 0x3B, 0x13);
+ TestRegAddrBase(cmp, ebx, esp, 0, i16, 4, 0x66, 0x3B, 0x1C, 0x24);
+ TestRegAddrBase(cmp, esp, ebp, 0, i16, 4, 0x66, 0x3B, 0x65, 0x00);
+ TestRegAddrBase(cmp, ebp, esi, 0, i16, 3, 0x66, 0x3B, 0x2E);
+ TestRegAddrBase(cmp, esi, edi, 0, i16, 3, 0x66, 0x3B, 0x37);
+ TestRegAddrBase(cmp, edi, r8, 0, i16, 4, 0x66, 0x41, 0x3B, 0x38);
+ TestRegAddrBase(cmp, r8, r9, 0, i16, 4, 0x66, 0x45, 0x3B, 0x01);
+ TestRegAddrBase(cmp, r9, r10, 0, i16, 4, 0x66, 0x45, 0x3B, 0x0A);
+ TestRegAddrBase(cmp, r10, r11, 0, i16, 4, 0x66, 0x45, 0x3B, 0x13);
+ TestRegAddrBase(cmp, r11, r12, 0, i16, 5, 0x66, 0x45, 0x3B, 0x1C, 0x24);
+ TestRegAddrBase(cmp, r12, r13, 0, i16, 5, 0x66, 0x45, 0x3B, 0x65, 0x00);
+ TestRegAddrBase(cmp, r13, r14, 0, i16, 4, 0x66, 0x45, 0x3B, 0x2E);
+ TestRegAddrBase(cmp, r14, r15, 0, i16, 4, 0x66, 0x45, 0x3B, 0x37);
+ TestRegAddrBase(cmp, r15, eax, 0, i16, 4, 0x66, 0x44, 0x3B, 0x38);
+
+ TestRegAddrBase(cmp, eax, ecx, 0, i8, 2, 0x3A, 0x01);
+ TestRegAddrBase(cmp, ecx, edx, 0, i8, 2, 0x3A, 0x0A);
+ TestRegAddrBase(cmp, edx, ebx, 0, i8, 2, 0x3A, 0x13);
+ TestRegAddrBase(cmp, ebx, esp, 0, i8, 3, 0x3A, 0x1C, 0x24);
+ TestRegAddrBase(cmp, esp, ebp, 0, i8, 4, 0x40, 0x3A, 0x65, 0x00);
+ TestRegAddrBase(cmp, ebp, esi, 0, i8, 3, 0x40, 0x3A, 0x2E);
+ TestRegAddrBase(cmp, esi, edi, 0, i8, 3, 0x40, 0x3A, 0x37);
+ TestRegAddrBase(cmp, edi, r8, 0, i8, 3, 0x41, 0x3A, 0x38);
+ TestRegAddrBase(cmp, r8, r9, 0, i8, 3, 0x45, 0x3A, 0x01);
+ TestRegAddrBase(cmp, r9, r10, 0, i8, 3, 0x45, 0x3A, 0x0A);
+ TestRegAddrBase(cmp, r10, r11, 0, i8, 3, 0x45, 0x3A, 0x13);
+ TestRegAddrBase(cmp, r11, r12, 0, i8, 4, 0x45, 0x3A, 0x1C, 0x24);
+ TestRegAddrBase(cmp, r12, r13, 0, i8, 4, 0x45, 0x3A, 0x65, 0x00);
+ TestRegAddrBase(cmp, r13, r14, 0, i8, 3, 0x45, 0x3A, 0x2E);
+ TestRegAddrBase(cmp, r14, r15, 0, i8, 3, 0x45, 0x3A, 0x37);
+ TestRegAddrBase(cmp, r15, eax, 0, i8, 3, 0x44, 0x3A, 0x38);
+
+ /* cmp GPR, Imm8(Base) */
+ TestRegAddrBase(cmp, eax, ecx, 0x40, i32, 3, 0x3B, 0x41, 0x40);
+ TestRegAddrBase(cmp, ecx, edx, 0x40, i32, 3, 0x3B, 0x4A, 0x40);
+ TestRegAddrBase(cmp, edx, ebx, 0x40, i32, 3, 0x3B, 0x53, 0x40);
+ TestRegAddrBase(cmp, ebx, esp, 0x40, i32, 4, 0x3B, 0x5C, 0x24, 0x40);
+ TestRegAddrBase(cmp, esp, ebp, 0x40, i32, 3, 0x3B, 0x65, 0x40);
+ TestRegAddrBase(cmp, ebp, esi, 0x40, i32, 3, 0x3B, 0x6E, 0x40);
+ TestRegAddrBase(cmp, esi, edi, 0x40, i32, 3, 0x3B, 0x77, 0x40);
+ TestRegAddrBase(cmp, edi, r8, 0x40, i32, 4, 0x41, 0x3B, 0x78, 0x40);
+ TestRegAddrBase(cmp, r8, r9, 0x40, i32, 4, 0x45, 0x3B, 0x41, 0x40);
+ TestRegAddrBase(cmp, r9, r10, 0x40, i32, 4, 0x45, 0x3B, 0x4A, 0x40);
+ TestRegAddrBase(cmp, r10, r11, 0x40, i32, 4, 0x45, 0x3B, 0x53, 0x40);
+ TestRegAddrBase(cmp, r11, r12, 0x40, i32, 5, 0x45, 0x3B, 0x5C, 0x24, 0x40);
+ TestRegAddrBase(cmp, r12, r13, 0x40, i32, 4, 0x45, 0x3B, 0x65, 0x40);
+ TestRegAddrBase(cmp, r13, r14, 0x40, i32, 4, 0x45, 0x3B, 0x6E, 0x40);
+ TestRegAddrBase(cmp, r14, r15, 0x40, i32, 4, 0x45, 0x3B, 0x77, 0x40);
+ TestRegAddrBase(cmp, r15, eax, 0x40, i32, 4, 0x44, 0x3B, 0x78, 0x40);
+
+ TestRegAddrBase(cmp, eax, ecx, 0x40, i16, 4, 0x66, 0x3B, 0x41, 0x40);
+ TestRegAddrBase(cmp, ecx, edx, 0x40, i16, 4, 0x66, 0x3B, 0x4A, 0x40);
+ TestRegAddrBase(cmp, edx, ebx, 0x40, i16, 4, 0x66, 0x3B, 0x53, 0x40);
+ TestRegAddrBase(cmp, ebx, esp, 0x40, i16, 5, 0x66, 0x3B, 0x5C, 0x24, 0x40);
+ TestRegAddrBase(cmp, esp, ebp, 0x40, i16, 4, 0x66, 0x3B, 0x65, 0x40);
+ TestRegAddrBase(cmp, ebp, esi, 0x40, i16, 4, 0x66, 0x3B, 0x6E, 0x40);
+ TestRegAddrBase(cmp, esi, edi, 0x40, i16, 4, 0x66, 0x3B, 0x77, 0x40);
+ TestRegAddrBase(cmp, edi, r8, 0x40, i16, 5, 0x66, 0x41, 0x3B, 0x78, 0x40);
+ TestRegAddrBase(cmp, r8, r9, 0x40, i16, 5, 0x66, 0x45, 0x3B, 0x41, 0x40);
+ TestRegAddrBase(cmp, r9, r10, 0x40, i16, 5, 0x66, 0x45, 0x3B, 0x4A, 0x40);
+ TestRegAddrBase(cmp, r10, r11, 0x40, i16, 5, 0x66, 0x45, 0x3B, 0x53, 0x40);
+ TestRegAddrBase(cmp, r11, r12, 0x40, i16, 6, 0x66, 0x45, 0x3B, 0x5C, 0x24,
+ 0x40);
+ TestRegAddrBase(cmp, r12, r13, 0x40, i16, 5, 0x66, 0x45, 0x3B, 0x65, 0x40);
+ TestRegAddrBase(cmp, r13, r14, 0x40, i16, 5, 0x66, 0x45, 0x3B, 0x6E, 0x40);
+ TestRegAddrBase(cmp, r14, r15, 0x40, i16, 5, 0x66, 0x45, 0x3B, 0x77, 0x40);
+ TestRegAddrBase(cmp, r15, eax, 0x40, i16, 5, 0x66, 0x44, 0x3B, 0x78, 0x40);
+
+ TestRegAddrBase(cmp, eax, ecx, 0x40, i8, 3, 0x3A, 0x41, 0x40);
+ TestRegAddrBase(cmp, ecx, edx, 0x40, i8, 3, 0x3A, 0x4A, 0x40);
+ TestRegAddrBase(cmp, edx, ebx, 0x40, i8, 3, 0x3A, 0x53, 0x40);
+ TestRegAddrBase(cmp, ebx, esp, 0x40, i8, 4, 0x3A, 0x5C, 0x24, 0x40);
+ TestRegAddrBase(cmp, esp, ebp, 0x40, i8, 4, 0x40, 0x3A, 0x65, 0x40);
+ TestRegAddrBase(cmp, ebp, esi, 0x40, i8, 4, 0x40, 0x3A, 0x6E, 0x40);
+ TestRegAddrBase(cmp, esi, edi, 0x40, i8, 4, 0x40, 0x3A, 0x77, 0x40);
+ TestRegAddrBase(cmp, edi, r8, 0x40, i8, 4, 0x41, 0x3A, 0x78, 0x40);
+ TestRegAddrBase(cmp, r8, r9, 0x40, i8, 4, 0x45, 0x3A, 0x41, 0x40);
+ TestRegAddrBase(cmp, r9, r10, 0x40, i8, 4, 0x45, 0x3A, 0x4A, 0x40);
+ TestRegAddrBase(cmp, r10, r11, 0x40, i8, 4, 0x45, 0x3A, 0x53, 0x40);
+ TestRegAddrBase(cmp, r11, r12, 0x40, i8, 5, 0x45, 0x3A, 0x5C, 0x24, 0x40);
+ TestRegAddrBase(cmp, r12, r13, 0x40, i8, 4, 0x45, 0x3A, 0x65, 0x40);
+ TestRegAddrBase(cmp, r13, r14, 0x40, i8, 4, 0x45, 0x3A, 0x6E, 0x40);
+ TestRegAddrBase(cmp, r14, r15, 0x40, i8, 4, 0x45, 0x3A, 0x77, 0x40);
+ TestRegAddrBase(cmp, r15, eax, 0x40, i8, 4, 0x44, 0x3A, 0x78, 0x40);
+
+ /* cmp GPR, Imm32(Base) */
+ TestRegAddrBase(cmp, eax, ecx, 0xF0, i32, 6, 0x3B, 0x81, 0xF0, 0x00, 0x00,
+ 0x00);
+ TestRegAddrBase(cmp, ecx, edx, 0xF0, i32, 6, 0x3B, 0x8A, 0xF0, 0x00, 0x00,
+ 0x00);
+ TestRegAddrBase(cmp, edx, ebx, 0xF0, i32, 6, 0x3B, 0x93, 0xF0, 0x00, 0x00,
+ 0x00);
+ TestRegAddrBase(cmp, ebx, esp, 0xF0, i32, 7, 0x3B, 0x9C, 0x24, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, esp, ebp, 0xF0, i32, 6, 0x3B, 0xA5, 0xF0, 0x00, 0x00,
+ 0x00);
+ TestRegAddrBase(cmp, ebp, esi, 0xF0, i32, 6, 0x3B, 0xAE, 0xF0, 0x00, 0x00,
+ 0x00);
+ TestRegAddrBase(cmp, esi, edi, 0xF0, i32, 6, 0x3B, 0xB7, 0xF0, 0x00, 0x00,
+ 0x00);
+ TestRegAddrBase(cmp, edi, r8, 0xF0, i32, 7, 0x41, 0x3B, 0xB8, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, r8, r9, 0xF0, i32, 7, 0x45, 0x3B, 0x81, 0xF0, 0x00, 0x00,
+ 0x00);
+ TestRegAddrBase(cmp, r9, r10, 0xF0, i32, 7, 0x45, 0x3B, 0x8A, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, r10, r11, 0xF0, i32, 7, 0x45, 0x3B, 0x93, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, r11, r12, 0xF0, i32, 8, 0x45, 0x3B, 0x9C, 0x24, 0xF0,
+ 0x00, 0x00, 0x00);
+ TestRegAddrBase(cmp, r12, r13, 0xF0, i32, 7, 0x45, 0x3B, 0xA5, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, r13, r14, 0xF0, i32, 7, 0x45, 0x3B, 0xAE, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, r14, r15, 0xF0, i32, 7, 0x45, 0x3B, 0xB7, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, r15, eax, 0xF0, i32, 7, 0x44, 0x3B, 0xB8, 0xF0, 0x00,
+ 0x00, 0x00);
+
+ TestRegAddrBase(cmp, eax, ecx, 0xF0, i16, 7, 0x66, 0x3B, 0x81, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, ecx, edx, 0xF0, i16, 7, 0x66, 0x3B, 0x8A, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, edx, ebx, 0xF0, i16, 7, 0x66, 0x3B, 0x93, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, ebx, esp, 0xF0, i16, 8, 0x66, 0x3B, 0x9C, 0x24, 0xF0,
+ 0x00, 0x00, 0x00);
+ TestRegAddrBase(cmp, esp, ebp, 0xF0, i16, 7, 0x66, 0x3B, 0xa5, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, ebp, esi, 0xF0, i16, 7, 0x66, 0x3B, 0xaE, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, esi, edi, 0xF0, i16, 7, 0x66, 0x3B, 0xb7, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, edi, r8, 0xF0, i16, 8, 0x66, 0x41, 0x3B, 0xb8, 0xF0,
+ 0x00, 0x00, 0x00);
+ TestRegAddrBase(cmp, r8, r9, 0xF0, i16, 8, 0x66, 0x45, 0x3B, 0x81, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, r9, r10, 0xF0, i16, 8, 0x66, 0x45, 0x3B, 0x8A, 0xF0,
+ 0x00, 0x00, 0x00);
+ TestRegAddrBase(cmp, r10, r11, 0xF0, i16, 8, 0x66, 0x45, 0x3B, 0x93, 0xF0,
+ 0x00, 0x00, 0x00);
+ TestRegAddrBase(cmp, r11, r12, 0xF0, i16, 9, 0x66, 0x45, 0x3B, 0x9C, 0x24,
+ 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBase(cmp, r12, r13, 0xF0, i16, 8, 0x66, 0x45, 0x3B, 0xa5, 0xF0,
+ 0x00, 0x00, 0x00);
+ TestRegAddrBase(cmp, r13, r14, 0xF0, i16, 8, 0x66, 0x45, 0x3B, 0xaE, 0xF0,
+ 0x00, 0x00, 0x00);
+ TestRegAddrBase(cmp, r14, r15, 0xF0, i16, 8, 0x66, 0x45, 0x3B, 0xb7, 0xF0,
+ 0x00, 0x00, 0x00);
+ TestRegAddrBase(cmp, r15, eax, 0xF0, i16, 8, 0x66, 0x44, 0x3B, 0xb8, 0xF0,
+ 0x00, 0x00, 0x00);
+
+ TestRegAddrBase(cmp, eax, ecx, 0xF0, i8, 6, 0x3A, 0x81, 0xF0, 0x00, 0x00,
+ 0x00);
+ TestRegAddrBase(cmp, ecx, edx, 0xF0, i8, 6, 0x3A, 0x8A, 0xF0, 0x00, 0x00,
+ 0x00);
+ TestRegAddrBase(cmp, edx, ebx, 0xF0, i8, 6, 0x3A, 0x93, 0xF0, 0x00, 0x00,
+ 0x00);
+ TestRegAddrBase(cmp, ebx, esp, 0xF0, i8, 7, 0x3A, 0x9C, 0x24, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, esp, ebp, 0xF0, i8, 7, 0x40, 0x3A, 0xA5, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, ebp, esi, 0xF0, i8, 7, 0x40, 0x3A, 0xAE, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, esi, edi, 0xF0, i8, 7, 0x40, 0x3A, 0xB7, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, edi, r8, 0xF0, i8, 7, 0x41, 0x3A, 0xB8, 0xF0, 0x00, 0x00,
+ 0x00);
+ TestRegAddrBase(cmp, r8, r9, 0xF0, i8, 7, 0x45, 0x3A, 0x81, 0xF0, 0x00, 0x00,
+ 0x00);
+ TestRegAddrBase(cmp, r9, r10, 0xF0, i8, 7, 0x45, 0x3A, 0x8A, 0xF0, 0x00, 0x00,
+ 0x00);
+ TestRegAddrBase(cmp, r10, r11, 0xF0, i8, 7, 0x45, 0x3A, 0x93, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, r11, r12, 0xF0, i8, 8, 0x45, 0x3A, 0x9C, 0x24, 0xF0,
+ 0x00, 0x00, 0x00);
+ TestRegAddrBase(cmp, r12, r13, 0xF0, i8, 7, 0x45, 0x3A, 0xA5, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, r13, r14, 0xF0, i8, 7, 0x45, 0x3A, 0xAE, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, r14, r15, 0xF0, i8, 7, 0x45, 0x3A, 0xB7, 0xF0, 0x00,
+ 0x00, 0x00);
+ TestRegAddrBase(cmp, r15, eax, 0xF0, i8, 7, 0x44, 0x3A, 0xB8, 0xF0, 0x00,
+ 0x00, 0x00);
+
+ /* cmp GPR, Imm(,Index,Scale) */
+ TestRegAddrScaledIndex(cmp, eax, ecx, 1, 0, i32, 7, 0x3B, 0x04, 0x0D, 0x00,
+ 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, ecx, edx, 2, 0, i32, 7, 0x3B, 0x0C, 0x55, 0x00,
+ 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, edx, ebx, 4, 0, i32, 7, 0x3B, 0x14, 0x9D, 0x00,
+ 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, r8, r9, 1, 0, i32, 8, 0x46, 0x3B, 0x04, 0x0D,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, r9, r10, 2, 0, i32, 8, 0x46, 0x3B, 0x0C, 0x55,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, r10, r11, 4, 0, i32, 8, 0x46, 0x3B, 0x14, 0x9D,
+ 0x00, 0x00, 0x00, 0x00);
+ // esp cannot be an scaled index.
+ TestRegAddrScaledIndex(cmp, esp, ebp, 8, 0, i32, 7, 0x3B, 0x24, 0xED, 0x00,
+ 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, ebp, esi, 1, 0, i32, 7, 0x3B, 0x2C, 0x35, 0x00,
+ 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, esi, edi, 2, 0, i32, 7, 0x3B, 0x34, 0x7D, 0x00,
+ 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, edi, eax, 4, 0, i32, 7, 0x3B, 0x3C, 0x85, 0x00,
+ 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, ebx, ecx, 8, 0, i32, 7, 0x3B, 0x1C, 0xCD, 0x00,
+ 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, r12, r13, 8, 0, i32, 8, 0x46, 0x3B, 0x24, 0xED,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, r13, r14, 1, 0, i32, 8, 0x46, 0x3B, 0x2C, 0x35,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, r14, r15, 2, 0, i32, 8, 0x46, 0x3B, 0x34, 0x7D,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, r15, r8, 4, 0, i32, 8, 0x46, 0x3B, 0x3C, 0x85,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, r11, r9, 8, 0, i32, 8, 0x46, 0x3B, 0x1C, 0xCD,
+ 0x00, 0x00, 0x00, 0x00);
+
+ TestRegAddrScaledIndex(cmp, eax, ecx, 8, 0, i16, 8, 0x66, 0x3B, 0x04, 0xCD,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, ecx, edx, 1, 0, i16, 8, 0x66, 0x3B, 0x0C, 0x15,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, edx, ebx, 2, 0, i16, 8, 0x66, 0x3B, 0x14, 0x5D,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, r8, r9, 8, 0, i16, 9, 0x66, 0x46, 0x3B, 0x04,
+ 0xCD, 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, r9, r10, 1, 0, i16, 9, 0x66, 0x46, 0x3B, 0x0C,
+ 0x15, 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, r10, r11, 2, 0, i16, 9, 0x66, 0x46, 0x3B, 0x14,
+ 0x5D, 0x00, 0x00, 0x00, 0x00);
+ // esp cannot be an scaled index.
+ TestRegAddrScaledIndex(cmp, esp, ebp, 4, 0, i16, 8, 0x66, 0x3B, 0x24, 0xAD,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, ebp, esi, 8, 0, i16, 8, 0x66, 0x3B, 0x2C, 0xF5,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, esi, edi, 1, 0, i16, 8, 0x66, 0x3B, 0x34, 0x3D,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, edi, eax, 2, 0, i16, 8, 0x66, 0x3B, 0x3C, 0x45,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, ebx, ecx, 8, 0, i16, 8, 0x66, 0x3B, 0x1C, 0xCD,
+ 0x00, 0x00, 0x00, 0x00);
+
+ TestRegAddrScaledIndex(cmp, eax, ecx, 4, 0, i8, 7, 0x3A, 0x04, 0x8D, 0x00,
+ 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, ecx, edx, 8, 0, i8, 7, 0x3A, 0x0C, 0xD5, 0x00,
+ 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, edx, ebx, 1, 0, i8, 7, 0x3A, 0x14, 0x1D, 0x00,
+ 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, r8, r9, 4, 0, i8, 8, 0x46, 0x3A, 0x04, 0x8D, 0x00,
+ 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, r9, r10, 8, 0, i8, 8, 0x46, 0x3A, 0x0C, 0xD5,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, r10, r11, 1, 0, i8, 8, 0x46, 0x3A, 0x14, 0x1D,
+ 0x00, 0x00, 0x00, 0x00);
+ // esp cannot be an scaled index.
+ TestRegAddrScaledIndex(cmp, esp, ebp, 2, 0, i8, 8, 0x40, 0x3A, 0x24, 0x6D,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, ebp, esi, 4, 0, i8, 8, 0x40, 0x3A, 0x2C, 0xB5,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, esi, edi, 8, 0, i8, 8, 0x40, 0x3A, 0x34, 0xFD,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, edi, eax, 1, 0, i8, 8, 0x40, 0x3A, 0x3C, 0x05,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, ebx, ecx, 8, 0, i8, 7, 0x3a, 0x1C, 0xCD, 0x00,
+ 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, r12, r13, 2, 0, i8, 8, 0x46, 0x3A, 0x24, 0x6D,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, r13, r14, 4, 0, i8, 8, 0x46, 0x3A, 0x2C, 0xB5,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, r14, r15, 8, 0, i8, 8, 0x46, 0x3A, 0x34, 0xFD,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, r15, r8, 1, 0, i8, 8, 0x46, 0x3A, 0x3C, 0x05,
+ 0x00, 0x00, 0x00, 0x00);
+ TestRegAddrScaledIndex(cmp, r11, r9, 8, 0, i8, 8, 0x46, 0x3a, 0x1C, 0xCD,
+ 0x00, 0x00, 0x00, 0x00);
+
+ /* cmp GPR, 0(Base,Index,Scale) */
+ TestRegAddrBaseScaledIndex(cmp, eax, ecx, edx, 1, 0, i32, 3, 0x3B, 0x04,
+ 0x11);
+ TestRegAddrBaseScaledIndex(cmp, ecx, edx, ebx, 2, 0, i32, 3, 0x3B, 0x0C,
+ 0x5A);
+ TestRegAddrBaseScaledIndex(cmp, r8, r9, r10, 1, 0, i32, 4, 0x47, 0x3B, 0x04,
+ 0x11);
+ TestRegAddrBaseScaledIndex(cmp, r9, r10, r11, 2, 0, i32, 4, 0x47, 0x3B, 0x0C,
+ 0x5A);
+ // esp cannot be an scaled index.
+ TestRegAddrBaseScaledIndex(cmp, ebx, esp, ebp, 4, 0, i32, 3, 0x3B, 0x1C,
+ 0xAC);
+ TestRegAddrBaseScaledIndex(cmp, esp, ebp, esi, 8, 0, i32, 4, 0x3B, 0x64, 0xF5,
+ 0x00);
+ TestRegAddrBaseScaledIndex(cmp, ebp, esi, edi, 1, 0, i32, 3, 0x3B, 0x2C,
+ 0x3E);
+ TestRegAddrBaseScaledIndex(cmp, esi, edi, eax, 2, 0, i32, 3, 0x3B, 0x34,
+ 0x47);
+ TestRegAddrBaseScaledIndex(cmp, edi, eax, ebx, 4, 0, i32, 3, 0x3B, 0x3C,
+ 0x98);
+ TestRegAddrBaseScaledIndex(cmp, ebx, ecx, edx, 8, 0, i32, 3, 0x3B, 0x1C,
+ 0xD1);
+ TestRegAddrBaseScaledIndex(cmp, r11, r12, r13, 4, 0, i32, 4, 0x47, 0x3B, 0x1C,
+ 0xAC);
+ TestRegAddrBaseScaledIndex(cmp, r12, r13, r14, 8, 0, i32, 5, 0x47, 0x3B, 0x64,
+ 0xF5, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r13, r14, r15, 1, 0, i32, 4, 0x47, 0x3B, 0x2C,
+ 0x3E);
+ TestRegAddrBaseScaledIndex(cmp, r14, r15, r8, 2, 0, i32, 4, 0x47, 0x3B, 0x34,
+ 0x47);
+ TestRegAddrBaseScaledIndex(cmp, r15, r8, r11, 4, 0, i32, 4, 0x47, 0x3B, 0x3C,
+ 0x98);
+ TestRegAddrBaseScaledIndex(cmp, r11, r9, r10, 8, 0, i32, 4, 0x47, 0x3B, 0x1C,
+ 0xD1);
+
+ TestRegAddrBaseScaledIndex(cmp, eax, ecx, edx, 1, 0, i16, 4, 0x66, 0x3B, 0x04,
+ 0x11);
+ TestRegAddrBaseScaledIndex(cmp, ecx, edx, ebx, 2, 0, i16, 4, 0x66, 0x3B, 0x0C,
+ 0x5A);
+ TestRegAddrBaseScaledIndex(cmp, r8, r9, r10, 1, 0, i16, 5, 0x66, 0x47, 0x3B,
+ 0x04, 0x11);
+ TestRegAddrBaseScaledIndex(cmp, r9, r10, r11, 2, 0, i16, 5, 0x66, 0x47, 0x3B,
+ 0x0C, 0x5A);
+ // esp cannot be an scaled index.
+ TestRegAddrBaseScaledIndex(cmp, ebx, esp, ebp, 4, 0, i16, 4, 0x66, 0x3B, 0x1C,
+ 0xAC);
+ TestRegAddrBaseScaledIndex(cmp, esp, ebp, esi, 8, 0, i16, 5, 0x66, 0x3B, 0x64,
+ 0xF5, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, ebp, esi, edi, 1, 0, i16, 4, 0x66, 0x3B, 0x2C,
+ 0x3E);
+ TestRegAddrBaseScaledIndex(cmp, esi, edi, eax, 2, 0, i16, 4, 0x66, 0x3B, 0x34,
+ 0x47);
+ TestRegAddrBaseScaledIndex(cmp, edi, eax, ebx, 4, 0, i16, 4, 0x66, 0x3B, 0x3C,
+ 0x98);
+ TestRegAddrBaseScaledIndex(cmp, ebx, ecx, edx, 8, 0, i16, 4, 0x66, 0x3B, 0x1C,
+ 0xD1);
+ TestRegAddrBaseScaledIndex(cmp, r11, r12, r13, 4, 0, i16, 5, 0x66, 0x47, 0x3B,
+ 0x1C, 0xAC);
+ TestRegAddrBaseScaledIndex(cmp, r12, r13, r14, 8, 0, i16, 6, 0x66, 0x47, 0x3B,
+ 0x64, 0xF5, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r13, r14, r15, 1, 0, i16, 5, 0x66, 0x47, 0x3B,
+ 0x2C, 0x3E);
+ TestRegAddrBaseScaledIndex(cmp, r14, r15, r8, 2, 0, i16, 5, 0x66, 0x47, 0x3B,
+ 0x34, 0x47);
+ TestRegAddrBaseScaledIndex(cmp, r15, r8, r11, 4, 0, i16, 5, 0x66, 0x47, 0x3B,
+ 0x3C, 0x98);
+ TestRegAddrBaseScaledIndex(cmp, r11, r9, r10, 8, 0, i16, 5, 0x66, 0x47, 0x3B,
+ 0x1C, 0xD1);
+
+ TestRegAddrBaseScaledIndex(cmp, eax, ecx, edx, 1, 0, i8, 3, 0x3A, 0x04, 0x11);
+ TestRegAddrBaseScaledIndex(cmp, ecx, edx, ebx, 2, 0, i8, 3, 0x3A, 0x0C, 0x5A);
+ TestRegAddrBaseScaledIndex(cmp, r8, r9, r10, 1, 0, i8, 4, 0x47, 0x3A, 0x04,
+ 0x11);
+ TestRegAddrBaseScaledIndex(cmp, r9, r10, r11, 2, 0, i8, 4, 0x47, 0x3A, 0x0C,
+ 0x5A);
+ // esp cannot be an scaled index.
+ TestRegAddrBaseScaledIndex(cmp, ebx, esp, ebp, 4, 0, i8, 3, 0x3A, 0x1C, 0xAC);
+ TestRegAddrBaseScaledIndex(cmp, esp, ebp, esi, 8, 0, i8, 5, 0x40, 0x3A, 0x64,
+ 0xF5, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, ebp, esi, edi, 1, 0, i8, 4, 0x40, 0x3A, 0x2C,
+ 0x3E);
+ TestRegAddrBaseScaledIndex(cmp, esi, edi, eax, 2, 0, i8, 4, 0x40, 0x3A, 0x34,
+ 0x47);
+ TestRegAddrBaseScaledIndex(cmp, edi, eax, ebx, 4, 0, i8, 4, 0x40, 0x3A, 0x3C,
+ 0x98);
+ TestRegAddrBaseScaledIndex(cmp, ebx, ecx, edx, 8, 0, i8, 3, 0x3A, 0x1C, 0xD1);
+ TestRegAddrBaseScaledIndex(cmp, r11, r12, r13, 4, 0, i8, 4, 0x47, 0x3A, 0x1C,
+ 0xAC);
+ TestRegAddrBaseScaledIndex(cmp, r12, r13, r14, 8, 0, i8, 5, 0x47, 0x3A, 0x64,
+ 0xF5, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r13, r14, r15, 1, 0, i8, 4, 0x47, 0x3A, 0x2C,
+ 0x3E);
+ TestRegAddrBaseScaledIndex(cmp, r14, r15, r8, 2, 0, i8, 4, 0x47, 0x3A, 0x34,
+ 0x47);
+ TestRegAddrBaseScaledIndex(cmp, r15, r8, r11, 4, 0, i8, 4, 0x47, 0x3A, 0x3C,
+ 0x98);
+ TestRegAddrBaseScaledIndex(cmp, r11, r9, r10, 8, 0, i8, 4, 0x47, 0x3A, 0x1C,
+ 0xD1);
+
+ /* cmp GPR, Imm8(Base,Index,Scale) */
+ TestRegAddrBaseScaledIndex(cmp, eax, ecx, edx, 1, 0x40, i32, 4, 0x3B, 0x44,
+ 0x11, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, ecx, edx, ebx, 2, 0x40, i32, 4, 0x3B, 0x4C,
+ 0x5A, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r8, r9, r10, 1, 0x40, i32, 5, 0x47, 0x3B,
+ 0x44, 0x11, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r9, r10, r11, 2, 0x40, i32, 5, 0x47, 0x3B,
+ 0x4C, 0x5A, 0x40);
+ // esp cannot be an scaled index.
+ TestRegAddrBaseScaledIndex(cmp, ebx, esp, ebp, 4, 0x40, i32, 4, 0x3B, 0x5C,
+ 0xAC, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, esp, ebp, esi, 8, 0x40, i32, 4, 0x3B, 0x64,
+ 0xF5, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, ebp, esi, edi, 1, 0x40, i32, 4, 0x3B, 0x6C,
+ 0x3E, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, esi, edi, eax, 2, 0x40, i32, 4, 0x3B, 0x74,
+ 0x47, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, edi, eax, ebx, 4, 0x40, i32, 4, 0x3B, 0x7C,
+ 0x98, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, ebx, ecx, edx, 8, 0x40, i32, 4, 0x3B, 0x5C,
+ 0xD1, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r11, r12, r13, 4, 0x40, i32, 5, 0x47, 0x3B,
+ 0x5C, 0xAC, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r12, r13, r14, 8, 0x40, i32, 5, 0x47, 0x3B,
+ 0x64, 0xF5, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r13, r14, r15, 1, 0x40, i32, 5, 0x47, 0x3B,
+ 0x6C, 0x3E, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r14, r15, r8, 2, 0x40, i32, 5, 0x47, 0x3B,
+ 0x74, 0x47, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r15, r8, r11, 4, 0x40, i32, 5, 0x47, 0x3B,
+ 0x7C, 0x98, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r11, r9, r10, 8, 0x40, i32, 5, 0x47, 0x3B,
+ 0x5C, 0xD1, 0x40);
+
+ TestRegAddrBaseScaledIndex(cmp, eax, ecx, edx, 1, 0x40, i16, 5, 0x66, 0x3B,
+ 0x44, 0x11, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, ecx, edx, ebx, 2, 0x40, i16, 5, 0x66, 0x3B,
+ 0x4C, 0x5A, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r8, r9, r10, 1, 0x40, i16, 6, 0x66, 0x47,
+ 0x3B, 0x44, 0x11, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r9, r10, r11, 2, 0x40, i16, 6, 0x66, 0x47,
+ 0x3B, 0x4C, 0x5A, 0x40);
+ // esp cannot be an scaled index.
+ TestRegAddrBaseScaledIndex(cmp, ebx, esp, ebp, 4, 0x40, i16, 5, 0x66, 0x3B,
+ 0x5C, 0xAC, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, esp, ebp, esi, 8, 0x40, i16, 5, 0x66, 0x3B,
+ 0x64, 0xF5, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, ebp, esi, edi, 1, 0x40, i16, 5, 0x66, 0x3B,
+ 0x6C, 0x3E, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, esi, edi, eax, 2, 0x40, i16, 5, 0x66, 0x3B,
+ 0x74, 0x47, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, edi, eax, ebx, 4, 0x40, i16, 5, 0x66, 0x3B,
+ 0x7C, 0x98, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, ebx, ecx, edx, 8, 0x40, i16, 5, 0x66, 0x3B,
+ 0x5C, 0xD1, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r11, r12, r13, 4, 0x40, i16, 6, 0x66, 0x47,
+ 0x3B, 0x5C, 0xAC, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r12, r13, r14, 8, 0x40, i16, 6, 0x66, 0x47,
+ 0x3B, 0x64, 0xF5, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r13, r14, r15, 1, 0x40, i16, 6, 0x66, 0x47,
+ 0x3B, 0x6C, 0x3E, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r14, r15, r8, 2, 0x40, i16, 6, 0x66, 0x47,
+ 0x3B, 0x74, 0x47, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r15, r8, r11, 4, 0x40, i16, 6, 0x66, 0x47,
+ 0x3B, 0x7C, 0x98, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r11, r9, r10, 8, 0x40, i16, 6, 0x66, 0x47,
+ 0x3B, 0x5C, 0xD1, 0x40);
+
+ TestRegAddrBaseScaledIndex(cmp, eax, ecx, edx, 1, 0x40, i8, 4, 0x3A, 0x44,
+ 0x11, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, ecx, edx, ebx, 2, 0x40, i8, 4, 0x3A, 0x4C,
+ 0x5A, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r8, r9, r10, 1, 0x40, i8, 5, 0x47, 0x3A, 0x44,
+ 0x11, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r9, r10, r11, 2, 0x40, i8, 5, 0x47, 0x3A,
+ 0x4C, 0x5A, 0x40);
+ // esp cannot be an scaled index.
+ TestRegAddrBaseScaledIndex(cmp, ebx, esp, ebp, 4, 0x40, i8, 4, 0x3A, 0x5C,
+ 0xAC, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, esp, ebp, esi, 8, 0x40, i8, 5, 0x40, 0x3A,
+ 0x64, 0xF5, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, ebp, esi, edi, 1, 0x40, i8, 5, 0x40, 0x3A,
+ 0x6C, 0x3E, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, esi, edi, eax, 2, 0x40, i8, 5, 0x40, 0x3A,
+ 0x74, 0x47, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, edi, eax, ebx, 4, 0x40, i8, 5, 0x40, 0x3A,
+ 0x7C, 0x98, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, ebx, ecx, edx, 8, 0x40, i8, 4, 0x3A, 0x5C,
+ 0xD1, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r11, r12, r13, 4, 0x40, i8, 5, 0x47, 0x3A,
+ 0x5C, 0xAC, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r12, r13, r14, 8, 0x40, i8, 5, 0x47, 0x3A,
+ 0x64, 0xF5, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r13, r14, r15, 1, 0x40, i8, 5, 0x47, 0x3A,
+ 0x6C, 0x3E, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r14, r15, r8, 2, 0x40, i8, 5, 0x47, 0x3A,
+ 0x74, 0x47, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r15, r8, r11, 4, 0x40, i8, 5, 0x47, 0x3A,
+ 0x7C, 0x98, 0x40);
+ TestRegAddrBaseScaledIndex(cmp, r11, r9, r10, 8, 0x40, i8, 5, 0x47, 0x3A,
+ 0x5C, 0xD1, 0x40);
+
+ /* cmp GPR, Imm32(Base,Index,Scale) */
+ TestRegAddrBaseScaledIndex(cmp, eax, ecx, edx, 1, 0xF0, i32, 7, 0x3B, 0x84,
+ 0x11, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, ecx, edx, ebx, 2, 0xF0, i32, 7, 0x3B, 0x8C,
+ 0x5A, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r8, r9, r10, 1, 0xF0, i32, 8, 0x47, 0x3B,
+ 0x84, 0x11, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r9, r10, r11, 2, 0xF0, i32, 8, 0x47, 0x3B,
+ 0x8C, 0x5A, 0xF0, 0x00, 0x00, 0x00);
+ // esp cannot be an scaled index.
+ TestRegAddrBaseScaledIndex(cmp, ebx, esp, ebp, 4, 0xF0, i32, 7, 0x3B, 0x9C,
+ 0xAC, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, esp, ebp, esi, 8, 0xF0, i32, 7, 0x3B, 0xA4,
+ 0xF5, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, ebp, esi, edi, 1, 0xF0, i32, 7, 0x3B, 0xAC,
+ 0x3E, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, esi, edi, eax, 2, 0xF0, i32, 7, 0x3B, 0xB4,
+ 0x47, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, edi, eax, ebx, 4, 0xF0, i32, 7, 0x3B, 0xBC,
+ 0x98, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, ebx, ecx, edx, 8, 0xF0, i32, 7, 0x3B, 0x9C,
+ 0xD1, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r11, r12, r13, 4, 0xF0, i32, 8, 0x47, 0x3B,
+ 0x9C, 0xAC, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r12, r13, r14, 8, 0xF0, i32, 8, 0x47, 0x3B,
+ 0xA4, 0xF5, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r13, r14, r15, 1, 0xF0, i32, 8, 0x47, 0x3B,
+ 0xAC, 0x3E, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r14, r15, r8, 2, 0xF0, i32, 8, 0x47, 0x3B,
+ 0xB4, 0x47, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r15, r8, r11, 4, 0xF0, i32, 8, 0x47, 0x3B,
+ 0xBC, 0x98, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r11, r9, r10, 8, 0xF0, i32, 8, 0x47, 0x3B,
+ 0x9C, 0xD1, 0xF0, 0x00, 0x00, 0x00);
+
+ TestRegAddrBaseScaledIndex(cmp, eax, ecx, edx, 1, 0xF0, i16, 8, 0x66, 0x3B,
+ 0x84, 0x11, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, ecx, edx, ebx, 2, 0xF0, i16, 8, 0x66, 0x3B,
+ 0x8C, 0x5A, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r8, r9, r10, 1, 0xF0, i16, 9, 0x66, 0x47,
+ 0x3B, 0x84, 0x11, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r9, r10, r11, 2, 0xF0, i16, 9, 0x66, 0x47,
+ 0x3B, 0x8C, 0x5A, 0xF0, 0x00, 0x00, 0x00);
+ // esp cannot be an scaled index.
+ TestRegAddrBaseScaledIndex(cmp, ebx, esp, ebp, 4, 0xF0, i16, 8, 0x66, 0x3B,
+ 0x9C, 0xAC, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, esp, ebp, esi, 8, 0xF0, i16, 8, 0x66, 0x3B,
+ 0xA4, 0xF5, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, ebp, esi, edi, 1, 0xF0, i16, 8, 0x66, 0x3B,
+ 0xAC, 0x3E, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, esi, edi, eax, 2, 0xF0, i16, 8, 0x66, 0x3B,
+ 0xB4, 0x47, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, edi, eax, ebx, 4, 0xF0, i16, 8, 0x66, 0x3B,
+ 0xBC, 0x98, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, ebx, ecx, edx, 8, 0xF0, i16, 8, 0x66, 0x3B,
+ 0x9C, 0xD1, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r11, r12, r13, 4, 0xF0, i16, 9, 0x66, 0x47,
+ 0x3B, 0x9C, 0xAC, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r12, r13, r14, 8, 0xF0, i16, 9, 0x66, 0x47,
+ 0x3B, 0xA4, 0xF5, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r13, r14, r15, 1, 0xF0, i16, 9, 0x66, 0x47,
+ 0x3B, 0xAC, 0x3E, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r14, r15, r8, 2, 0xF0, i16, 9, 0x66, 0x47,
+ 0x3B, 0xB4, 0x47, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r15, r8, r11, 4, 0xF0, i16, 9, 0x66, 0x47,
+ 0x3B, 0xBC, 0x98, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r11, r9, r10, 8, 0xF0, i16, 9, 0x66, 0x47,
+ 0x3B, 0x9C, 0xD1, 0xF0, 0x00, 0x00, 0x00);
+
+ TestRegAddrBaseScaledIndex(cmp, eax, ecx, edx, 1, 0xF0, i8, 7, 0x3A, 0x84,
+ 0x11, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, ecx, edx, ebx, 2, 0xF0, i8, 7, 0x3A, 0x8C,
+ 0x5A, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r8, r9, r10, 1, 0xF0, i8, 8, 0x47, 0x3A, 0x84,
+ 0x11, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r9, r10, r11, 2, 0xF0, i8, 8, 0x47, 0x3A,
+ 0x8C, 0x5A, 0xF0, 0x00, 0x00, 0x00);
+ // esp cannot be an scaled index.
+ TestRegAddrBaseScaledIndex(cmp, ebx, esp, ebp, 4, 0xF0, i8, 7, 0x3A, 0x9C,
+ 0xAC, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, esp, ebp, esi, 8, 0xF0, i8, 8, 0x40, 0x3A,
+ 0xA4, 0xF5, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, ebp, esi, edi, 1, 0xF0, i8, 8, 0x40, 0x3A,
+ 0xAC, 0x3E, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, esi, edi, eax, 2, 0xF0, i8, 8, 0x40, 0x3A,
+ 0xB4, 0x47, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, edi, eax, ebx, 4, 0xF0, i8, 8, 0x40, 0x3A,
+ 0xBC, 0x98, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, ebx, ecx, edx, 8, 0xF0, i8, 7, 0x3A, 0x9C,
+ 0xD1, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r11, r12, r13, 4, 0xF0, i8, 8, 0x47, 0x3A,
+ 0x9C, 0xAC, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r12, r13, r14, 8, 0xF0, i8, 8, 0x47, 0x3A,
+ 0xA4, 0xF5, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r13, r14, r15, 1, 0xF0, i8, 8, 0x47, 0x3A,
+ 0xAC, 0x3E, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r14, r15, r8, 2, 0xF0, i8, 8, 0x47, 0x3A,
+ 0xB4, 0x47, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r15, r8, r11, 4, 0xF0, i8, 8, 0x47, 0x3A,
+ 0xBC, 0x98, 0xF0, 0x00, 0x00, 0x00);
+ TestRegAddrBaseScaledIndex(cmp, r11, r9, r10, 8, 0xF0, i8, 8, 0x47, 0x3A,
+ 0x9C, 0xD1, 0xF0, 0x00, 0x00, 0x00);
+
+ /* cmp Addr, Imm */
+ // Note: at this point we trust the assembler knows how to encode addresses,
+ // so no more exhaustive addressing mode testing.
+ TestAddrBaseScaledIndexImm(cmp, eax, ecx, 1, 0xF0, 0x12, i32, 8, 0x83, 0xBC,
+ 0x08, 0xF0, 0x00, 0x00, 0x00, 0x12);
+ TestAddrBaseScaledIndexImm(cmp, ecx, edx, 1, 0xF0, 0xF0, i32, 11, 0x81, 0xBC,
+ 0x11, 0xF0, 0x00, 0x00, 0x00, 0xF0, 0x00, 0x00,
+ 0x00);
+ TestAddrBaseScaledIndexImm(cmp, r8, r9, 1, 0xF0, 0x12, i32, 9, 0x43, 0x83,
+ 0xBC, 0x08, 0xF0, 0x00, 0x00, 0x00, 0x12);
+ TestAddrBaseScaledIndexImm(cmp, r9, r10, 1, 0xF0, 0xF0, i32, 12, 0x43, 0x81,
+ 0xBC, 0x11, 0xF0, 0x00, 0x00, 0x00, 0xF0, 0x00,
+ 0x00, 0x00);
+
+ TestAddrBaseScaledIndexImm(cmp, eax, ecx, 1, 0xF0, 0x12, i16, 9, 0x66, 0x83,
+ 0xBC, 0x08, 0xF0, 0x00, 0x00, 0x00, 0x12);
+ TestAddrBaseScaledIndexImm(cmp, ecx, edx, 1, 0xF0, 0xF0, i16, 10, 0x66, 0x81,
+ 0xBC, 0x11, 0xF0, 0x00, 0x00, 0x00, 0xF0, 0x00);
+ TestAddrBaseScaledIndexImm(cmp, r8, r9, 1, 0xF0, 0x12, i16, 10, 0x66, 0x43,
+ 0x83, 0xBC, 0x08, 0xF0, 0x00, 0x00, 0x00, 0x12);
+ TestAddrBaseScaledIndexImm(cmp, r9, r10, 1, 0xF0, 0xF0, i16, 11, 0x66, 0x43,
+ 0x81, 0xBC, 0x11, 0xF0, 0x00, 0x00, 0x00, 0xF0,
+ 0x00);
+
+ TestAddrBaseScaledIndexImm(cmp, eax, ecx, 1, 0xF0, 0x12, i8, 8, 0x80, 0xBC,
+ 0x08, 0xF0, 0x00, 0x00, 0x00, 0x12);
+ TestAddrBaseScaledIndexImm(cmp, r8, r9, 1, 0xF0, 0x12, i8, 9, 0x43, 0x80,
+ 0xBC, 0x08, 0xF0, 0x00, 0x00, 0x00, 0x12);
+
+ /* cmp Addr, GPR */
+ TestAddrBaseScaledIndexReg(cmp, eax, ecx, 1, 0xF0, edx, i32, 7, 0x39, 0x94,
+ 0x08, 0xF0, 0x00, 0x00, 0x00);
+ TestAddrBaseScaledIndexReg(cmp, r8, r9, 1, 0xF0, r10, i32, 8, 0x47, 0x39,
+ 0x94, 0x08, 0xF0, 0x00, 0x00, 0x00);
+
+ TestAddrBaseScaledIndexReg(cmp, eax, ecx, 1, 0xF0, edx, i16, 8, 0x66, 0x39,
+ 0x94, 0x08, 0xF0, 0x00, 0x00, 0x00);
+ TestAddrBaseScaledIndexReg(cmp, r8, r9, 1, 0xF0, r10, i16, 9, 0x66, 0x47,
+ 0x39, 0x94, 0x08, 0xF0, 0x00, 0x00, 0x00);
+
+ TestAddrBaseScaledIndexReg(cmp, eax, ecx, 1, 0xF0, edx, i8, 7, 0x38, 0x94,
+ 0x08, 0xF0, 0x00, 0x00, 0x00);
+ TestAddrBaseScaledIndexReg(cmp, r8, r9, 1, 0xF0, r10, i8, 8, 0x47, 0x38, 0x94,
+ 0x08, 0xF0, 0x00, 0x00, 0x00);
+
+#undef TestAddrBaseScaledIndexReg
+#undef TestAddrBaseScaledIndexImm
+#undef TestRegAddrBaseScaledIndex
+#undef TestRegAddrScaledIndex
+#undef TestRegAddrBase
+#undef TestRegAbsoluteAddr
+#undef TestRegImm
+#undef TestRegReg
+}
+
+// After these tests we should have a sane environment; we know the following
+// work:
+//
+// (*) zeroing eax, ebx, ecx, edx, edi, and esi;
+// (*) call $4 instruction (used for ip materialization);
+// (*) register push and pop;
+// (*) cmp reg, reg; and
+// (*) returning from functions.
+//
+// We can now dive into testing each emitting method in AssemblerX8664. Each
+// test will emit some instructions for performing the test. The assembled
+// instructions will operate in a "safe" environment. All x86-64 registers are
+// spilled to the program stack, and the registers are then zeroed out, with the
+// exception of %esp and %r9.
+//
+// The jitted code and the unittest code will share the same stack. Therefore,
+// test harnesses need to ensure it does not leave anything it pushed on the
+// stack.
+//
+// %r9 is initialized with a pointer for rIP-based addressing. This pointer is
+// used for position-independent access to a scratchpad area for use in tests.
+// In theory we could use rip-based addressing, but in practice that would
+// require creating fixups, which would, in turn, require creating a global
+// context. We therefore rely on the same technique used for pic code in x86-32
+// (i.e., IP materialization). Upon a test start up, a call(NextInstruction) is
+// executed. We then pop the return address from the stack, and use it for pic
+// addressing.
+//
+// The jitted code will look like the following:
+//
+// test:
+// push %r9
+// call test$materialize_ip
+// test$materialize_ip: <<------- %r9 will point here
+// pop %r9
+// push %rax
+// push %rbx
+// push %rcx
+// push %rdx
+// push %rbp
+// push %rdi
+// push %rsi
+// push %r8
+// push %r10
+// push %r11
+// push %r12
+// push %r13
+// push %r14
+// push %r15
+// mov $0, %rax
+// mov $0, %rbx
+// mov $0, %rcx
+// mov $0, %rdx
+// mov $0, %rbp
+// mov $0, %rdi
+// mov $0, %rsi
+// mov $0, %r8
+// mov $0, %r10
+// mov $0, %r11
+// mov $0, %r12
+// mov $0, %r13
+// mov $0, %r14
+// mov $0, %r15
+//
+// << test code goes here >>
+//
+// mov %rax, { 0 + $ScratchpadOffset}(%rbp)
+// mov %rbx, { 8 + $ScratchpadOffset}(%rbp)
+// mov %rcx, { 16 + $ScratchpadOffset}(%rbp)
+// mov %rdx, { 24 + $ScratchpadOffset}(%rbp)
+// mov %rdi, { 32 + $ScratchpadOffset}(%rbp)
+// mov %rsi, { 40 + $ScratchpadOffset}(%rbp)
+// mov %rbp, { 48 + $ScratchpadOffset}(%rbp)
+// mov %rsp, { 56 + $ScratchpadOffset}(%rbp)
+// mov %r8, { 64 + $ScratchpadOffset}(%rbp)
+// mov %r9, { 72 + $ScratchpadOffset}(%rbp)
+// mov %r10, { 80 + $ScratchpadOffset}(%rbp)
+// mov %r11, { 88 + $ScratchpadOffset}(%rbp)
+// mov %r12, { 96 + $ScratchpadOffset}(%rbp)
+// mov %r13, {104 + $ScratchpadOffset}(%rbp)
+// mov %r14, {112 + $ScratchpadOffset}(%rbp)
+// mov %r15, {120 + $ScratchpadOffset}(%rbp)
+// movups %xmm0, {128 + $ScratchpadOffset}(%rbp)
+// movups %xmm1, {136 + $ScratchpadOffset}(%rbp)
+// movups %xmm2, {144 + $ScratchpadOffset}(%rbp)
+// movups %xmm3, {152 + $ScratchpadOffset}(%rbp)
+// movups %xmm4, {160 + $ScratchpadOffset}(%rbp)
+// movups %xmm5, {168 + $ScratchpadOffset}(%rbp)
+// movups %xmm6, {176 + $ScratchpadOffset}(%rbp)
+// movups %xmm7, {184 + $ScratchpadOffset}(%rbp)
+// movups %xmm8, {192 + $ScratchpadOffset}(%rbp)
+// movups %xmm9, {200 + $ScratchpadOffset}(%rbp)
+// movups %xmm10, {208 + $ScratchpadOffset}(%rbp)
+// movups %xmm11, {216 + $ScratchpadOffset}(%rbp)
+// movups %xmm12, {224 + $ScratchpadOffset}(%rbp)
+// movups %xmm13, {232 + $ScratchpadOffset}(%rbp)
+// movups %xmm14, {240 + $ScratchpadOffset}(%rbp)
+// movups %xmm15, {248 + $ScratchpadOffset}(%rbp)
+//
+// pop %r15
+// pop %r14
+// pop %r13
+// pop %r12
+// pop %r11
+// pop %r10
+// pop %r8
+// pop %rsi
+// pop %rdi
+// pop %rbp
+// pop %rdx
+// pop %rcx
+// pop %rbx
+// pop %rax
+// pop %r9
+// ret
+//
+// << ... >>
+//
+// scratchpad: <<------- accessed via $Offset(%ebp)
+//
+// << test scratch area >>
+//
+// TODO(jpp): test the
+//
+// mov %reg, $Offset(%ebp)
+// movups %xmm, $Offset(%ebp)
+//
+// encodings using the low level assembler test ensuring that the register
+// values can be written to the scratchpad area.
+//
+// r9 was deliberately choosen so that every instruction accessing memory would
+// fail if the rex prefix was not emitted for it.
+class AssemblerX8664Test : public AssemblerX8664TestBase {
+protected:
+ // Dqword is used to represent 128-bit data types. The Dqword's contents are
+ // the same as the contents read from memory. Tests can then use the union
+ // members to verify the tests' outputs.
+ //
+ // NOTE: We want sizeof(Dqword) == sizeof(uint64_t) * 2. In other words, we
+ // want Dqword's contents to be **exactly** what the memory contents were so
+ // that we can do, e.g.,
+ //
+ // ...
+ // float Ret[4];
+ // // populate Ret
+ // return *reinterpret_cast<Dqword *>(&Ret);
+ //
+ // While being an ugly hack, this kind of return statements are used
+ // extensively in the PackedArith (see below) class.
+ union Dqword {
+ template <typename T0, typename T1, typename T2, typename T3,
+ typename = typename std::enable_if<
+ std::is_floating_point<T0>::value>::type>
+ Dqword(T0 F0, T1 F1, T2 F2, T3 F3) {
+ F32[0] = F0;
+ F32[1] = F1;
+ F32[2] = F2;
+ F32[3] = F3;
+ }
+
+ template <typename T>
+ Dqword(typename std::enable_if<std::is_same<T, int32_t>::value, T>::type I0,
+ T I1, T I2, T I3) {
+ I32[0] = I0;
+ I32[1] = I1;
+ I32[2] = I2;
+ I32[3] = I3;
+ }
+
+ template <typename T>
+ Dqword(typename std::enable_if<std::is_same<T, uint64_t>::value, T>::type
+ U64_0,
+ T U64_1) {
+ U64[0] = U64_0;
+ U64[1] = U64_1;
+ }
+
+ template <typename T>
+ Dqword(typename std::enable_if<std::is_same<T, double>::value, T>::type D0,
+ T D1) {
+ F64[0] = D0;
+ F64[1] = D1;
+ }
+
+ bool operator==(const Dqword &Rhs) const {
+ return std::memcmp(this, &Rhs, sizeof(*this)) == 0;
+ }
+
+ double F64[2];
+ uint64_t U64[2];
+ int64_t I64[2];
+
+ float F32[4];
+ uint32_t U32[4];
+ int32_t I32[4];
+
+ uint16_t U16[8];
+ int16_t I16[8];
+
+ uint8_t U8[16];
+ int8_t I8[16];
+
+ private:
+ Dqword() = delete;
+ };
+
+ // As stated, we want this condition to hold, so we assert.
+ static_assert(sizeof(Dqword) == 2 * sizeof(uint64_t),
+ "Dqword has the wrong size.");
+
+ // PackedArith is an interface provider for Dqwords. PackedArith's C argument
+ // is the undelying Dqword's type, which is then used so that we can define
+ // operators in terms of C++ operators on the underlying elements' type.
+ template <typename C> class PackedArith {
+ public:
+ static constexpr uint32_t N = sizeof(Dqword) / sizeof(C);
+ static_assert(N * sizeof(C) == sizeof(Dqword),
+ "Invalid template paramenter.");
+ static_assert((N & 1) == 0, "N should be divisible by 2");
+
+#define DefinePackedComparisonOperator(Op) \
+ template <typename Container = C, int Size = N> \
+ typename std::enable_if<std::is_floating_point<Container>::value, \
+ Dqword>::type \
+ operator Op(const Dqword &Rhs) const { \
+ using ElemType = \
+ typename std::conditional<std::is_same<float, Container>::value, \
+ int32_t, int64_t>::type; \
+ static_assert(sizeof(ElemType) == sizeof(Container), \
+ "Check ElemType definition."); \
+ const ElemType *const RhsPtr = \
+ reinterpret_cast<const ElemType *const>(&Rhs); \
+ const ElemType *const LhsPtr = \
+ reinterpret_cast<const ElemType *const>(&Lhs); \
+ ElemType Ret[N]; \
+ for (uint32_t i = 0; i < N; ++i) { \
+ Ret[i] = (LhsPtr[i] Op RhsPtr[i]) ? -1 : 0; \
+ } \
+ return *reinterpret_cast<Dqword *>(&Ret); \
+ }
+
+ DefinePackedComparisonOperator(< );
+ DefinePackedComparisonOperator(<= );
+ DefinePackedComparisonOperator(> );
+ DefinePackedComparisonOperator(>= );
+ DefinePackedComparisonOperator(== );
+ DefinePackedComparisonOperator(!= );
+
+#undef DefinePackedComparisonOperator
+
+#define DefinePackedOrdUnordComparisonOperator(Op, Ordered) \
+ template <typename Container = C, int Size = N> \
+ typename std::enable_if<std::is_floating_point<Container>::value, \
+ Dqword>::type \
+ Op(const Dqword &Rhs) const { \
+ using ElemType = \
+ typename std::conditional<std::is_same<float, Container>::value, \
+ int32_t, int64_t>::type; \
+ static_assert(sizeof(ElemType) == sizeof(Container), \
+ "Check ElemType definition."); \
+ const Container *const RhsPtr = \
+ reinterpret_cast<const Container *const>(&Rhs); \
+ const Container *const LhsPtr = \
+ reinterpret_cast<const Container *const>(&Lhs); \
+ ElemType Ret[N]; \
+ for (uint32_t i = 0; i < N; ++i) { \
+ Ret[i] = (!(LhsPtr[i] == LhsPtr[i]) || !(RhsPtr[i] == RhsPtr[i])) != \
+ (Ordered) \
+ ? -1 \
+ : 0; \
+ } \
+ return *reinterpret_cast<Dqword *>(&Ret); \
+ }
+
+ DefinePackedOrdUnordComparisonOperator(ord, true);
+ DefinePackedOrdUnordComparisonOperator(unord, false);
+#undef DefinePackedOrdUnordComparisonOperator
+
+#define DefinePackedArithOperator(Op, RhsIndexChanges, NeedsInt) \
+ template <typename Container = C, int Size = N> \
+ Dqword operator Op(const Dqword &Rhs) const { \
+ using ElemTypeForFp = typename std::conditional< \
+ !(NeedsInt), Container, \
+ typename std::conditional< \
+ std::is_same<Container, float>::value, uint32_t, \
+ typename std::conditional<std::is_same<Container, double>::value, \
+ uint64_t, void>::type>::type>::type; \
+ using ElemType = \
+ typename std::conditional<std::is_integral<Container>::value, \
+ Container, ElemTypeForFp>::type; \
+ static_assert(!std::is_same<void, ElemType>::value, \
+ "Check ElemType definition."); \
+ const ElemType *const RhsPtr = \
+ reinterpret_cast<const ElemType *const>(&Rhs); \
+ const ElemType *const LhsPtr = \
+ reinterpret_cast<const ElemType *const>(&Lhs); \
+ ElemType Ret[N]; \
+ for (uint32_t i = 0; i < N; ++i) { \
+ Ret[i] = LhsPtr[i] Op RhsPtr[(RhsIndexChanges) ? i : 0]; \
+ } \
+ return *reinterpret_cast<Dqword *>(&Ret); \
+ }
+
+ DefinePackedArithOperator(>>, false, true);
+ DefinePackedArithOperator(<<, false, true);
+ DefinePackedArithOperator(+, true, false);
+ DefinePackedArithOperator(-, true, false);
+ DefinePackedArithOperator(/, true, false);
+ DefinePackedArithOperator(&, true, true);
+ DefinePackedArithOperator(|, true, true);
+ DefinePackedArithOperator (^, true, true);
+
+#undef DefinePackedArithOperator
+
+#define DefinePackedArithShiftImm(Op) \
+ template <typename Container = C, int Size = N> \
+ Dqword operator Op(uint8_t imm) const { \
+ const Container *const LhsPtr = \
+ reinterpret_cast<const Container *const>(&Lhs); \
+ Container Ret[N]; \
+ for (uint32_t i = 0; i < N; ++i) { \
+ Ret[i] = LhsPtr[i] Op imm; \
+ } \
+ return *reinterpret_cast<Dqword *>(&Ret); \
+ }
+
+ DefinePackedArithShiftImm(>> );
+ DefinePackedArithShiftImm(<< );
+
+#undef DefinePackedArithShiftImm
+
+ template <typename Container = C, int Size = N>
+ typename std::enable_if<std::is_signed<Container>::value ||
+ std::is_floating_point<Container>::value,
+ Dqword>::type
+ operator*(const Dqword &Rhs) const {
+ static_assert((std::is_integral<Container>::value &&
+ sizeof(Container) < sizeof(uint64_t)) ||
+ std::is_floating_point<Container>::value,
+ "* is only defined for i(8|16|32), and fp types.");
+
+ const Container *const RhsPtr =
+ reinterpret_cast<const Container *const>(&Rhs);
+ const Container *const LhsPtr =
+ reinterpret_cast<const Container *const>(&Lhs);
+ Container Ret[Size];
+ for (uint32_t i = 0; i < Size; ++i) {
+ Ret[i] = LhsPtr[i] * RhsPtr[i];
+ }
+ return *reinterpret_cast<Dqword *>(&Ret);
+ }
+
+ template <typename Container = C, int Size = N,
+ typename = typename std::enable_if<
+ !std::is_signed<Container>::value>::type>
+ Dqword operator*(const Dqword &Rhs) const {
+ static_assert(std::is_integral<Container>::value &&
+ sizeof(Container) < sizeof(uint64_t),
+ "* is only defined for ui(8|16|32)");
+ using NextType = typename std::conditional<
+ sizeof(Container) == 1, uint16_t,
+ typename std::conditional<sizeof(Container) == 2, uint32_t,
+ uint64_t>::type>::type;
+ static_assert(sizeof(Container) * 2 == sizeof(NextType),
+ "Unexpected size");
+
+ const Container *const RhsPtr =
+ reinterpret_cast<const Container *const>(&Rhs);
+ const Container *const LhsPtr =
+ reinterpret_cast<const Container *const>(&Lhs);
+ NextType Ret[Size / 2];
+ for (uint32_t i = 0; i < Size; i += 2) {
+ Ret[i / 2] =
+ static_cast<NextType>(LhsPtr[i]) * static_cast<NextType>(RhsPtr[i]);
+ }
+ return *reinterpret_cast<Dqword *>(&Ret);
+ }
+
+ template <typename Container = C, int Size = N>
+ PackedArith<Container> operator~() const {
+ const Container *const LhsPtr =
+ reinterpret_cast<const Container *const>(&Lhs);
+ Container Ret[Size];
+ for (uint32_t i = 0; i < Size; ++i) {
+ Ret[i] = ~LhsPtr[i];
+ }
+ return PackedArith<Container>(*reinterpret_cast<Dqword *>(&Ret));
+ }
+
+#define MinMaxOperations(Name, Suffix) \
+ template <typename Container = C, int Size = N> \
+ Dqword Name##Suffix(const Dqword &Rhs) const { \
+ static_assert(std::is_floating_point<Container>::value, \
+ #Name #Suffix "ps is only available for fp."); \
+ const Container *const RhsPtr = \
+ reinterpret_cast<const Container *const>(&Rhs); \
+ const Container *const LhsPtr = \
+ reinterpret_cast<const Container *const>(&Lhs); \
+ Container Ret[Size]; \
+ for (uint32_t i = 0; i < Size; ++i) { \
+ Ret[i] = std::Name(LhsPtr[i], RhsPtr[i]); \
+ } \
+ return *reinterpret_cast<Dqword *>(&Ret); \
+ }
+
+ MinMaxOperations(max, ps);
+ MinMaxOperations(max, pd);
+ MinMaxOperations(min, ps);
+ MinMaxOperations(min, pd);
+#undef MinMaxOperations
+
+ template <typename Container = C, int Size = N>
+ Dqword blendWith(const Dqword &Rhs, const Dqword &Mask) const {
+ using MaskType = typename std::conditional<
+ sizeof(Container) == 1, int8_t,
+ typename std::conditional<sizeof(Container) == 2, int16_t,
+ int32_t>::type>::type;
+ static_assert(sizeof(MaskType) == sizeof(Container),
+ "MaskType has the wrong size.");
+ const Container *const RhsPtr =
+ reinterpret_cast<const Container *const>(&Rhs);
+ const Container *const LhsPtr =
+ reinterpret_cast<const Container *const>(&Lhs);
+ const MaskType *const MaskPtr =
+ reinterpret_cast<const MaskType *const>(&Mask);
+ Container Ret[Size];
+ for (int i = 0; i < Size; ++i) {
+ Ret[i] = ((MaskPtr[i] < 0) ? RhsPtr : LhsPtr)[i];
+ }
+ return *reinterpret_cast<Dqword *>(&Ret);
+ }
+
+ private:
+ // The AssemblerX8664Test class needs to be a friend so that it can create
+ // PackedArith objects (see below.)
+ friend class AssemblerX8664Test;
+
+ explicit PackedArith(const Dqword &MyLhs) : Lhs(MyLhs) {}
+
+ // Lhs can't be a & because operator~ returns a temporary object that needs
+ // access to its own Dqword.
+ const Dqword Lhs;
+ };
+
+ // Named constructor for PackedArith objects.
+ template <typename C> static PackedArith<C> packedAs(const Dqword &D) {
+ return PackedArith<C>(D);
+ }
+
+ AssemblerX8664Test() { reset(); }
+
+ void reset() {
+ AssemblerX8664TestBase::reset();
+
+ NeedsEpilogue = true;
+ // These dwords are allocated for saving the GPR state after the jitted code
+ // runs.
+ NumAllocatedDwords = AssembledTest::ScratchpadSlots;
+ addPrologue();
+ }
+
+ // AssembledTest is a wrapper around a PROT_EXEC mmap'ed buffer. This buffer
+ // contains both the test code as well as prologue/epilogue, and the
+ // scratchpad area that tests may use -- all tests use this scratchpad area
+ // for storing the processor's registers after the tests executed. This class
+ // also exposes helper methods for reading the register state after test
+ // execution, as well as for reading the scratchpad area.
+ class AssembledTest {
+ AssembledTest() = delete;
+ AssembledTest(const AssembledTest &) = delete;
+ AssembledTest &operator=(const AssembledTest &) = delete;
+
+ public:
+ static constexpr uint32_t MaximumCodeSize = 1 << 20;
+ static constexpr uint32_t raxSlot() { return 0; }
+ static constexpr uint32_t rbxSlot() { return 2; }
+ static constexpr uint32_t rcxSlot() { return 4; }
+ static constexpr uint32_t rdxSlot() { return 6; }
+ static constexpr uint32_t rdiSlot() { return 8; }
+ static constexpr uint32_t rsiSlot() { return 10; }
+ static constexpr uint32_t rbpSlot() { return 12; }
+ static constexpr uint32_t rspSlot() { return 14; }
+ static constexpr uint32_t r8Slot() { return 16; }
+ static constexpr uint32_t r9Slot() { return 18; }
+ static constexpr uint32_t r10Slot() { return 20; }
+ static constexpr uint32_t r11Slot() { return 22; }
+ static constexpr uint32_t r12Slot() { return 24; }
+ static constexpr uint32_t r13Slot() { return 26; }
+ static constexpr uint32_t r14Slot() { return 28; }
+ static constexpr uint32_t r15Slot() { return 30; }
+
+ // save 4 dwords for each xmm registers.
+ static constexpr uint32_t xmm0Slot() { return 32; }
+ static constexpr uint32_t xmm1Slot() { return 36; }
+ static constexpr uint32_t xmm2Slot() { return 40; }
+ static constexpr uint32_t xmm3Slot() { return 44; }
+ static constexpr uint32_t xmm4Slot() { return 48; }
+ static constexpr uint32_t xmm5Slot() { return 52; }
+ static constexpr uint32_t xmm6Slot() { return 56; }
+ static constexpr uint32_t xmm7Slot() { return 60; }
+ static constexpr uint32_t xmm8Slot() { return 64; }
+ static constexpr uint32_t xmm9Slot() { return 68; }
+ static constexpr uint32_t xmm10Slot() { return 72; }
+ static constexpr uint32_t xmm11Slot() { return 76; }
+ static constexpr uint32_t xmm12Slot() { return 80; }
+ static constexpr uint32_t xmm13Slot() { return 84; }
+ static constexpr uint32_t xmm14Slot() { return 88; }
+ static constexpr uint32_t xmm15Slot() { return 92; }
+
+ static constexpr uint32_t ScratchpadSlots = 96;
+
+ AssembledTest(const uint8_t *Data, const size_t MySize,
+ const size_t ExtraStorageDwords)
+ : Size(MaximumCodeSize + 4 * ExtraStorageDwords) {
+ // MaxCodeSize is needed because EXPECT_LT needs a symbol with a name --
+ // probably a compiler bug?
+ uint32_t MaxCodeSize = MaximumCodeSize;
+ EXPECT_LT(MySize, MaxCodeSize);
+ assert(MySize < MaximumCodeSize);
+ ExecutableData = mmap(nullptr, Size, PROT_WRITE | PROT_READ | PROT_EXEC,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ EXPECT_NE(MAP_FAILED, ExecutableData) << strerror(errno);
+ assert(MAP_FAILED != ExecutableData);
+ std::memcpy(ExecutableData, Data, MySize);
+ }
+
+ // We allow AssembledTest to be moved so that we can return objects of
+ // this type.
+ AssembledTest(AssembledTest &&Buffer)
+ : ExecutableData(Buffer.ExecutableData), Size(Buffer.Size) {
+ Buffer.ExecutableData = nullptr;
+ Buffer.Size = 0;
+ }
+
+ AssembledTest &operator=(AssembledTest &&Buffer) {
+ ExecutableData = Buffer.ExecutableData;
+ Buffer.ExecutableData = nullptr;
+ Size = Buffer.Size;
+ Buffer.Size = 0;
+ return *this;
+ }
+
+ ~AssembledTest() {
+ if (ExecutableData != nullptr) {
+ munmap(ExecutableData, Size);
+ ExecutableData = nullptr;
+ }
+ }
+
+ void run() const { reinterpret_cast<void (*)()>(ExecutableData)(); }
+
+#define LegacyRegAccessors(NewName, Name64, Name32, Name16, Name8) \
+ static_assert(Encoded_GPR_##NewName() == Encoded_GPR_##Name64(), \
+ "Invalid aliasing."); \
+ uint64_t NewName() const { \
+ return contentsOfQword(AssembledTest::Name64##Slot()); \
+ } \
+ static_assert(Encoded_GPR_##NewName##q() == Encoded_GPR_##Name64(), \
+ "Invalid aliasing."); \
+ uint64_t NewName##q() const { \
+ return contentsOfQword(AssembledTest::Name64##Slot()); \
+ } \
+ static_assert(Encoded_GPR_##NewName##d() == Encoded_GPR_##Name64(), \
+ "Invalid aliasing."); \
+ uint32_t NewName##d() const { \
+ return contentsOfQword(AssembledTest::Name64##Slot()); \
+ } \
+ static_assert(Encoded_GPR_##NewName##w() == Encoded_GPR_##Name64(), \
+ "Invalid aliasing."); \
+ uint16_t NewName##w() const { \
+ return contentsOfQword(AssembledTest::Name64##Slot()); \
+ } \
+ static_assert(Encoded_GPR_##NewName##l() == Encoded_GPR_##Name64(), \
+ "Invalid aliasing."); \
+ uint8_t NewName##l() const { \
+ return contentsOfQword(AssembledTest::Name64##Slot()); \
+ } \
+ static_assert(Encoded_GPR_##Name64() == Encoded_GPR_##Name64(), \
+ "Invalid aliasing."); \
+ uint64_t Name64() const { \
+ return contentsOfQword(AssembledTest::Name64##Slot()); \
+ } \
+ static_assert(Encoded_GPR_##Name32() == Encoded_GPR_##Name64(), \
+ "Invalid aliasing."); \
+ uint32_t Name32() const { \
+ return contentsOfQword(AssembledTest::Name64##Slot()); \
+ } \
+ static_assert(Encoded_GPR_##Name16() == Encoded_GPR_##Name64(), \
+ "Invalid aliasing."); \
+ uint16_t Name16() const { \
+ return contentsOfQword(AssembledTest::Name64##Slot()); \
+ } \
+ static_assert(Encoded_GPR_##Name8() == Encoded_GPR_##Name64(), \
+ "Invalid aliasing."); \
+ uint8_t Name8() const { \
+ return contentsOfQword(AssembledTest::Name64##Slot()); \
+ }
+#define NewRegAccessors(NewName) \
+ uint64_t NewName() const { \
+ return contentsOfQword(AssembledTest::NewName##Slot()); \
+ } \
+ uint64_t NewName##q() const { \
+ return contentsOfQword(AssembledTest::NewName##Slot()); \
+ } \
+ uint32_t NewName##d() const { \
+ return contentsOfQword(AssembledTest::NewName##Slot()); \
+ } \
+ uint16_t NewName##w() const { \
+ return contentsOfQword(AssembledTest::NewName##Slot()); \
+ } \
+ uint8_t NewName##l() const { \
+ return contentsOfQword(AssembledTest::NewName##Slot()); \
+ }
+#define XmmRegAccessor(Name) \
+ template <typename T> T Name() const { \
+ return xmm<T>(AssembledTest::Name##Slot()); \
+ }
+ LegacyRegAccessors(r0, rsp, esp, sp, spl);
+ LegacyRegAccessors(r1, rax, eax, ax, al);
+ LegacyRegAccessors(r2, rbx, ebx, bx, bl);
+ LegacyRegAccessors(r3, rcx, ecx, cx, cl);
+ LegacyRegAccessors(r4, rdx, edx, dx, dl);
+ LegacyRegAccessors(r5, rbp, ebp, bp, bpl);
+ LegacyRegAccessors(r6, rsi, esi, si, sil);
+ LegacyRegAccessors(r7, rdi, edi, di, dil);
+ NewRegAccessors(r8);
+ NewRegAccessors(r9);
+ NewRegAccessors(r10);
+ NewRegAccessors(r11);
+ NewRegAccessors(r12);
+ NewRegAccessors(r13);
+ NewRegAccessors(r14);
+ NewRegAccessors(r15);
+ XmmRegAccessor(xmm0);
+ XmmRegAccessor(xmm1);
+ XmmRegAccessor(xmm2);
+ XmmRegAccessor(xmm3);
+ XmmRegAccessor(xmm4);
+ XmmRegAccessor(xmm5);
+ XmmRegAccessor(xmm6);
+ XmmRegAccessor(xmm7);
+ XmmRegAccessor(xmm8);
+ XmmRegAccessor(xmm9);
+ XmmRegAccessor(xmm10);
+ XmmRegAccessor(xmm11);
+ XmmRegAccessor(xmm12);
+ XmmRegAccessor(xmm13);
+ XmmRegAccessor(xmm14);
+ XmmRegAccessor(xmm15);
+#undef XmmRegAccessor
+#undef NewRegAccessors
+#undef LegacyRegAccessors
+
+ // contentsOfDword is used for reading the values in the scratchpad area.
+ // Valid arguments are the dword ids returned by
+ // AssemblerX8664Test::allocateDword() -- other inputs are considered
+ // invalid, and are not guaranteed to work if the implementation changes.
+ template <typename T = uint32_t, typename = typename std::enable_if<
+ sizeof(T) == sizeof(uint32_t)>::type>
+ T contentsOfDword(uint32_t Dword) const {
+ return *reinterpret_cast<T *>(static_cast<uint8_t *>(ExecutableData) +
+ dwordOffset(Dword));
+ }
+
+ template <typename T = uint64_t, typename = typename std::enable_if<
+ sizeof(T) == sizeof(uint64_t)>::type>
+ T contentsOfQword(uint32_t InitialDword) const {
+ return *reinterpret_cast<T *>(static_cast<uint8_t *>(ExecutableData) +
+ dwordOffset(InitialDword));
+ }
+
+ Dqword contentsOfDqword(uint32_t InitialDword) const {
+ return *reinterpret_cast<Dqword *>(
+ static_cast<uint8_t *>(ExecutableData) +
+ dwordOffset(InitialDword));
+ }
+
+ template <typename T = uint32_t, typename = typename std::enable_if<
+ sizeof(T) == sizeof(uint32_t)>::type>
+ void setDwordTo(uint32_t Dword, T value) {
+ *reinterpret_cast<uint32_t *>(static_cast<uint8_t *>(ExecutableData) +
+ dwordOffset(Dword)) =
+ *reinterpret_cast<uint32_t *>(&value);
+ }
+
+ template <typename T = uint64_t, typename = typename std::enable_if<
+ sizeof(T) == sizeof(uint64_t)>::type>
+ void setQwordTo(uint32_t InitialDword, T value) {
+ *reinterpret_cast<uint64_t *>(static_cast<uint8_t *>(ExecutableData) +
+ dwordOffset(InitialDword)) =
+ *reinterpret_cast<uint64_t *>(&value);
+ }
+
+ void setDqwordTo(uint32_t InitialDword, const Dqword &qdword) {
+ setQwordTo(InitialDword, qdword.U64[0]);
+ setQwordTo(InitialDword + 2, qdword.U64[1]);
+ }
+
+ private:
+ template <typename T>
+ typename std::enable_if<std::is_same<T, Dqword>::value, Dqword>::type
+ xmm(uint8_t Slot) const {
+ return contentsOfDqword(Slot);
+ }
+
+ template <typename T>
+ typename std::enable_if<!std::is_same<T, Dqword>::value, T>::type
+ xmm(uint8_t Slot) const {
+ constexpr bool TIs64Bit = sizeof(T) == sizeof(uint64_t);
+ using _64BitType = typename std::conditional<TIs64Bit, T, uint64_t>::type;
+ using _32BitType = typename std::conditional<TIs64Bit, uint32_t, T>::type;
+ if (TIs64Bit) {
+ return contentsOfQword<_64BitType>(Slot);
+ }
+ return contentsOfDword<_32BitType>(Slot);
+ }
+
+ static uint32_t dwordOffset(uint32_t Index) {
+ return MaximumCodeSize + (Index * 4);
+ }
+
+ void *ExecutableData = nullptr;
+ size_t Size;
+ };
+
+ // assemble created an AssembledTest with the jitted code. The first time
+ // assemble is executed it will add the epilogue to the jitted code (which is
+ // the reason why this method is not const qualified.
+ AssembledTest assemble() {
+ if (NeedsEpilogue) {
+ addEpilogue();
+ }
+
+ NeedsEpilogue = false;
+ return AssembledTest(codeBytes(), codeBytesSize(), NumAllocatedDwords);
+ }
+
+ // Allocates a new dword slot in the test's scratchpad area.
+ uint32_t allocateDword() { return NumAllocatedDwords++; }
+
+ // Allocates a new qword slot in the test's scratchpad area.
+ uint32_t allocateQword() {
+ uint32_t InitialDword = allocateDword();
+ allocateDword();
+ return InitialDword;
+ }
+
+ // Allocates a new dqword slot in the test's scratchpad area.
+ uint32_t allocateDqword() {
+ uint32_t InitialDword = allocateQword();
+ allocateQword();
+ return InitialDword;
+ }
+
+ Address dwordAddress(uint32_t Dword) {
+ return Address(Encoded_GPR_r9(), dwordDisp(Dword));
+ }
+
+private:
+ // e??SlotAddress returns an AssemblerX8664::Traits::Address that can be used
+ // by the test cases to encode an address operand for accessing the slot for
+ // the specified register. These are all private for, when jitting the test
+ // code, tests should not tamper with these values. Besides, during the test
+ // execution these slots' contents are undefined and should not be accessed.
+ Address raxSlotAddress() { return dwordAddress(AssembledTest::raxSlot()); }
+ Address rbxSlotAddress() { return dwordAddress(AssembledTest::rbxSlot()); }
+ Address rcxSlotAddress() { return dwordAddress(AssembledTest::rcxSlot()); }
+ Address rdxSlotAddress() { return dwordAddress(AssembledTest::rdxSlot()); }
+ Address rdiSlotAddress() { return dwordAddress(AssembledTest::rdiSlot()); }
+ Address rsiSlotAddress() { return dwordAddress(AssembledTest::rsiSlot()); }
+ Address rbpSlotAddress() { return dwordAddress(AssembledTest::rbpSlot()); }
+ Address rspSlotAddress() { return dwordAddress(AssembledTest::rspSlot()); }
+ Address r8SlotAddress() { return dwordAddress(AssembledTest::r8Slot()); }
+ Address r9SlotAddress() { return dwordAddress(AssembledTest::r9Slot()); }
+ Address r10SlotAddress() { return dwordAddress(AssembledTest::r10Slot()); }
+ Address r11SlotAddress() { return dwordAddress(AssembledTest::r11Slot()); }
+ Address r12SlotAddress() { return dwordAddress(AssembledTest::r12Slot()); }
+ Address r13SlotAddress() { return dwordAddress(AssembledTest::r13Slot()); }
+ Address r14SlotAddress() { return dwordAddress(AssembledTest::r14Slot()); }
+ Address r15SlotAddress() { return dwordAddress(AssembledTest::r15Slot()); }
+ Address xmm0SlotAddress() { return dwordAddress(AssembledTest::xmm0Slot()); }
+ Address xmm1SlotAddress() { return dwordAddress(AssembledTest::xmm1Slot()); }
+ Address xmm2SlotAddress() { return dwordAddress(AssembledTest::xmm2Slot()); }
+ Address xmm3SlotAddress() { return dwordAddress(AssembledTest::xmm3Slot()); }
+ Address xmm4SlotAddress() { return dwordAddress(AssembledTest::xmm4Slot()); }
+ Address xmm5SlotAddress() { return dwordAddress(AssembledTest::xmm5Slot()); }
+ Address xmm6SlotAddress() { return dwordAddress(AssembledTest::xmm6Slot()); }
+ Address xmm7SlotAddress() { return dwordAddress(AssembledTest::xmm7Slot()); }
+ Address xmm8SlotAddress() { return dwordAddress(AssembledTest::xmm8Slot()); }
+ Address xmm9SlotAddress() { return dwordAddress(AssembledTest::xmm9Slot()); }
+ Address xmm10SlotAddress() {
+ return dwordAddress(AssembledTest::xmm10Slot());
+ }
+ Address xmm11SlotAddress() {
+ return dwordAddress(AssembledTest::xmm11Slot());
+ }
+ Address xmm12SlotAddress() {
+ return dwordAddress(AssembledTest::xmm12Slot());
+ }
+ Address xmm13SlotAddress() {
+ return dwordAddress(AssembledTest::xmm13Slot());
+ }
+ Address xmm14SlotAddress() {
+ return dwordAddress(AssembledTest::xmm14Slot());
+ }
+ Address xmm15SlotAddress() {
+ return dwordAddress(AssembledTest::xmm15Slot());
+ }
+
+ // Returns the displacement that should be used when accessing the specified
+ // Dword in the scratchpad area. It needs to adjust for the initial
+ // instructions that are emitted before the call that materializes the IP
+ // register.
+ uint32_t dwordDisp(uint32_t Dword) const {
+ EXPECT_LT(Dword, NumAllocatedDwords);
+ assert(Dword < NumAllocatedDwords);
+ static constexpr uint8_t PushR9Bytes = 2;
+ static constexpr uint8_t CallImmBytes = 5;
+ return AssembledTest::MaximumCodeSize + (Dword * 4) -
+ (PushR9Bytes + CallImmBytes);
+ }
+
+ void addPrologue() {
+ __ pushl(Encoded_GPR_r9());
+ __ call(Immediate(4));
+ __ popl(Encoded_GPR_r9());
+
+ __ pushl(Encoded_GPR_rax());
+ __ pushl(Encoded_GPR_rbx());
+ __ pushl(Encoded_GPR_rcx());
+ __ pushl(Encoded_GPR_rdx());
+ __ pushl(Encoded_GPR_rbp());
+ __ pushl(Encoded_GPR_rdi());
+ __ pushl(Encoded_GPR_rsi());
+ __ pushl(Encoded_GPR_r8());
+ __ pushl(Encoded_GPR_r10());
+ __ pushl(Encoded_GPR_r11());
+ __ pushl(Encoded_GPR_r12());
+ __ pushl(Encoded_GPR_r13());
+ __ pushl(Encoded_GPR_r14());
+ __ pushl(Encoded_GPR_r15());
+
+ __ mov(IceType_i32, Encoded_GPR_rax(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_rbx(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_rcx(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_rdx(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_rbp(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_rdi(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_rsi(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_r8(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_r10(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_r11(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_r12(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_r13(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_r14(), Immediate(0x00));
+ __ mov(IceType_i32, Encoded_GPR_r15(), Immediate(0x00));
+ }
+
+ void addEpilogue() {
+ __ mov(IceType_i64, raxSlotAddress(), Encoded_GPR_rax());
+ __ mov(IceType_i64, rbxSlotAddress(), Encoded_GPR_rbx());
+ __ mov(IceType_i64, rcxSlotAddress(), Encoded_GPR_rcx());
+ __ mov(IceType_i64, rdxSlotAddress(), Encoded_GPR_rdx());
+ __ mov(IceType_i64, rdiSlotAddress(), Encoded_GPR_rdi());
+ __ mov(IceType_i64, rsiSlotAddress(), Encoded_GPR_rsi());
+ __ mov(IceType_i64, rbpSlotAddress(), Encoded_GPR_rbp());
+ __ mov(IceType_i64, rspSlotAddress(), Encoded_GPR_rsp());
+ __ mov(IceType_i64, r8SlotAddress(), Encoded_GPR_r8());
+ __ mov(IceType_i64, r9SlotAddress(), Encoded_GPR_r9());
+ __ mov(IceType_i64, r10SlotAddress(), Encoded_GPR_r10());
+ __ mov(IceType_i64, r11SlotAddress(), Encoded_GPR_r11());
+ __ mov(IceType_i64, r12SlotAddress(), Encoded_GPR_r12());
+ __ mov(IceType_i64, r13SlotAddress(), Encoded_GPR_r13());
+ __ mov(IceType_i64, r14SlotAddress(), Encoded_GPR_r14());
+ __ mov(IceType_i64, r15SlotAddress(), Encoded_GPR_r15());
+ __ movups(xmm0SlotAddress(), Encoded_Xmm_xmm0());
+ __ movups(xmm1SlotAddress(), Encoded_Xmm_xmm1());
+ __ movups(xmm2SlotAddress(), Encoded_Xmm_xmm2());
+ __ movups(xmm3SlotAddress(), Encoded_Xmm_xmm3());
+ __ movups(xmm4SlotAddress(), Encoded_Xmm_xmm4());
+ __ movups(xmm5SlotAddress(), Encoded_Xmm_xmm5());
+ __ movups(xmm6SlotAddress(), Encoded_Xmm_xmm6());
+ __ movups(xmm7SlotAddress(), Encoded_Xmm_xmm7());
+ __ movups(xmm8SlotAddress(), Encoded_Xmm_xmm8());
+ __ movups(xmm9SlotAddress(), Encoded_Xmm_xmm9());
+ __ movups(xmm10SlotAddress(), Encoded_Xmm_xmm10());
+ __ movups(xmm11SlotAddress(), Encoded_Xmm_xmm11());
+ __ movups(xmm12SlotAddress(), Encoded_Xmm_xmm12());
+ __ movups(xmm13SlotAddress(), Encoded_Xmm_xmm13());
+ __ movups(xmm14SlotAddress(), Encoded_Xmm_xmm14());
+ __ movups(xmm15SlotAddress(), Encoded_Xmm_xmm15());
+
+ __ popl(Encoded_GPR_r15());
+ __ popl(Encoded_GPR_r14());
+ __ popl(Encoded_GPR_r13());
+ __ popl(Encoded_GPR_r12());
+ __ popl(Encoded_GPR_r11());
+ __ popl(Encoded_GPR_r10());
+ __ popl(Encoded_GPR_r8());
+ __ popl(Encoded_GPR_rsi());
+ __ popl(Encoded_GPR_rdi());
+ __ popl(Encoded_GPR_rbp());
+ __ popl(Encoded_GPR_rdx());
+ __ popl(Encoded_GPR_rcx());
+ __ popl(Encoded_GPR_rbx());
+ __ popl(Encoded_GPR_rax());
+ __ popl(Encoded_GPR_r9());
+
+ __ ret();
+ }
+
+ bool NeedsEpilogue;
+ uint32_t NumAllocatedDwords;
+};
+
+TEST_F(AssemblerX8664Test, ScratchpadGettersAndSetters) {
+ const uint32_t S0 = allocateDword();
+ const uint32_t S1 = allocateDword();
+ const uint32_t S2 = allocateDword();
+ const uint32_t S3 = allocateDword();
+ AssembledTest test = assemble();
+ test.setDwordTo(S0, 0xBEEF0000u);
+ test.setDwordTo(S1, 0xDEADu);
+ test.setDwordTo(S2, 0x20406080u);
+ ASSERT_EQ(0xBEEF0000u, test.contentsOfDword(S0));
+ ASSERT_EQ(0xDEADu, test.contentsOfDword(S1));
+ ASSERT_EQ(0x20406080u, test.contentsOfDword(S2));
+ ASSERT_EQ(0xDEADBEEF0000ull, test.contentsOfQword(S0));
+ ASSERT_EQ(0x204060800000DEADull, test.contentsOfQword(S1));
+
+ test.setQwordTo(S1, 0x1234567890ABCDEFull);
+ ASSERT_EQ(0x1234567890ABCDEFull, test.contentsOfQword(S1));
+ test.setDwordTo(S0, 0xBEEF0000u);
+ ASSERT_EQ(0x90ABCDEFull, test.contentsOfDword(S1));
+ ASSERT_EQ(0x12345678ull, test.contentsOfDword(S2));
+
+ test.setDwordTo(S0, 1.0f);
+ ASSERT_FLOAT_EQ(1.0f, test.contentsOfDword<float>(S0));
+ test.setQwordTo(S0, 3.14);
+ ASSERT_DOUBLE_EQ(3.14, test.contentsOfQword<double>(S0));
+
+ test.setDqwordTo(S0, Dqword(1.0f, 2.0f, 3.0f, 4.0f));
+ ASSERT_EQ(Dqword(1.0f, 2.0f, 3.0f, 4.0f), test.contentsOfDqword(S0));
+ EXPECT_FLOAT_EQ(1.0f, test.contentsOfDword<float>(S0));
+ EXPECT_FLOAT_EQ(2.0f, test.contentsOfDword<float>(S1));
+ EXPECT_FLOAT_EQ(3.0f, test.contentsOfDword<float>(S2));
+ EXPECT_FLOAT_EQ(4.0f, test.contentsOfDword<float>(S3));
+}
+
+TEST_F(AssemblerX8664Test, MovRegImm) {
+ static constexpr uint32_t Mask8 = 0x000000FF;
+ static constexpr uint32_t Mask16 = 0x0000FFFF;
+ static constexpr uint32_t Mask32 = 0xFFFFFFFF;
+
+#define MovRegImm(Reg, Suffix, Size) \
+ do { \
+ static constexpr char TestString[] = "(" #Reg ", " #Size ")"; \
+ static constexpr uint32_t Value = (0xABCD7645) & Mask##Size; \
+ static constexpr uint32_t Marker = 0xBEEFFEEB; \
+ __ mov(IceType_i32, Encoded_GPR_##Reg##q(), Immediate(Marker)); \
+ __ mov(IceType_i##Size, Encoded_GPR_##Reg##Suffix(), Immediate(Value)); \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ ASSERT_EQ(Value, test.Reg##Suffix()) << TestString; \
+ ASSERT_EQ((Marker & ~Mask##Size) | Value, test.Reg##d()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImpl(Reg) \
+ do { \
+ MovRegImm(Reg, l, 8); \
+ MovRegImm(Reg, w, 16); \
+ MovRegImm(Reg, d, 32); \
+ /* MovRegImm64 not implemented */ \
+ } while (0)
+
+ TestImpl(r1);
+ TestImpl(r2);
+ TestImpl(r3);
+ TestImpl(r4);
+ TestImpl(r5);
+ TestImpl(r6);
+ TestImpl(r7);
+ TestImpl(r8);
+ TestImpl(r10);
+ TestImpl(r11);
+ TestImpl(r12);
+ TestImpl(r13);
+ TestImpl(r14);
+ TestImpl(r15);
+
+#undef TestImpl
+#undef MovRegImm
+}
+
+TEST_F(AssemblerX8664Test, MovMemImm) {
+ const uint32_t T0 = allocateDword();
+ constexpr uint32_t ExpectedT0 = 0x00111100ul;
+ const uint32_t T1 = allocateDword();
+ constexpr uint32_t ExpectedT1 = 0x00222200ul;
+ const uint32_t T2 = allocateDword();
+ constexpr uint32_t ExpectedT2 = 0x03333000ul;
+ const uint32_t T3 = allocateDword();
+ constexpr uint32_t ExpectedT3 = 0x00444400ul;
+
+ __ mov(IceType_i32, dwordAddress(T0), Immediate(ExpectedT0));
+ __ mov(IceType_i16, dwordAddress(T1), Immediate(ExpectedT1));
+ __ mov(IceType_i8, dwordAddress(T2), Immediate(ExpectedT2));
+ __ mov(IceType_i32, dwordAddress(T3), Immediate(ExpectedT3));
+
+ AssembledTest test = assemble();
+ test.run();
+ EXPECT_EQ(0ul, test.eax());
+ EXPECT_EQ(0ul, test.ebx());
+ EXPECT_EQ(0ul, test.ecx());
+ EXPECT_EQ(0ul, test.edx());
+ EXPECT_EQ(0ul, test.edi());
+ EXPECT_EQ(0ul, test.esi());
+ EXPECT_EQ(ExpectedT0, test.contentsOfDword(T0));
+ EXPECT_EQ(ExpectedT1 & 0xFFFF, test.contentsOfDword(T1));
+ EXPECT_EQ(ExpectedT2 & 0xFF, test.contentsOfDword(T2));
+ EXPECT_EQ(ExpectedT3, test.contentsOfDword(T3));
+}
+
+TEST_F(AssemblerX8664Test, MovMemReg) {
+ static constexpr uint64_t Mask8 = 0x00000000000000FF;
+ static constexpr uint64_t Mask16 = 0x000000000000FFFF;
+ static constexpr uint64_t Mask32 = 0x00000000FFFFFFFF;
+ static constexpr uint64_t Mask64 = 0xFFFFFFFFFFFFFFFF;
+
+#define TestMemReg(Src, Size) \
+ do { \
+ static constexpr char TestString[] = "(" #Src ", " #Size ")"; \
+ static constexpr uint32_t Value = 0x1a4d567e & Mask##Size; \
+ static constexpr uint64_t Marker = 0xD0DA33EEBEEFFEEB; \
+ const uint32_t T0 = allocateQword(); \
+ \
+ __ mov(IceType_i32, Encoded_GPR_##Src(), Immediate(Value)); \
+ __ mov(IceType_i##Size, dwordAddress(T0), Encoded_GPR_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setQwordTo(T0, Marker); \
+ test.run(); \
+ \
+ ASSERT_EQ((Marker & ~Mask##Size) | Value, test.contentsOfQword(T0)) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImpl(Src) \
+ do { \
+ TestMemReg(Src, 8); \
+ TestMemReg(Src, 16); \
+ TestMemReg(Src, 32); \
+ TestMemReg(Src, 64); \
+ } while (0)
+
+ TestImpl(r1);
+ TestImpl(r2);
+ TestImpl(r3);
+ TestImpl(r4);
+ TestImpl(r5);
+ TestImpl(r6);
+ TestImpl(r7);
+ TestImpl(r8);
+ TestImpl(r10);
+ TestImpl(r11);
+ TestImpl(r12);
+ TestImpl(r13);
+ TestImpl(r14);
+ TestImpl(r15);
+
+#undef TestImpl
+#undef TestMemReg
+}
+
+TEST_F(AssemblerX8664Test, MovRegReg) {
+ static constexpr uint64_t Mask8 = 0x00000000000000FFull;
+ static constexpr uint64_t Mask16 = 0x000000000000FFFFull;
+ static constexpr uint64_t Mask32 = 0x00000000FFFFFFFFull;
+ static constexpr uint64_t Mask64 = 0xFFFFFFFFFFFFFFFFull;
+
+ static constexpr uint64_t MaskResult8 = 0x00000000000000FFull;
+ static constexpr uint64_t MaskResult16 = 0x000000000000FFFFull;
+ static constexpr uint64_t MaskResult32 = 0xFFFFFFFFFFFFFFFFull;
+ static constexpr uint64_t MaskResult64 = 0xFFFFFFFFFFFFFFFFull;
+
+#define TestRegReg(Dst, Src, Suffix, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Src ", " #Suffix ", " #Size ")"; \
+ const uint8_t T0 = allocateQword(); \
+ static constexpr uint64_t Value = 0xA4DD30Af86CCE321ull & Mask##Size; \
+ const uint8_t T1 = allocateQword(); \
+ static constexpr uint64_t Marker = 0xC0FFEEA0BEEFFEEFull; \
+ \
+ __ mov(IceType_i64, Encoded_GPR_##Src(), dwordAddress(T0)); \
+ __ mov(IceType_i64, Encoded_GPR_##Dst(), dwordAddress(T1)); \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst(), Encoded_GPR_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setQwordTo(T0, Value); \
+ test.setQwordTo(T1, Marker); \
+ test.run(); \
+ \
+ ASSERT_EQ((Marker & ~MaskResult##Size) | Value, test.Dst()) << TestString; \
+ ASSERT_EQ(Value, test.Dst##Suffix()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImpl(Dst, Src) \
+ do { \
+ TestRegReg(Dst, Src, l, 8); \
+ TestRegReg(Dst, Src, w, 16); \
+ TestRegReg(Dst, Src, d, 32); \
+ TestRegReg(Dst, Src, q, 64); \
+ } while (0)
+
+ TestImpl(r1, r2);
+ TestImpl(r2, r3);
+ TestImpl(r3, r4);
+ TestImpl(r4, r5);
+ TestImpl(r5, r6);
+ TestImpl(r6, r7);
+ TestImpl(r7, r8);
+ TestImpl(r8, r10);
+ TestImpl(r10, r11);
+ TestImpl(r11, r12);
+ TestImpl(r12, r13);
+ TestImpl(r13, r14);
+ TestImpl(r14, r15);
+ TestImpl(r15, r1);
+
+#undef TestImpl
+#undef TestRegReg
+}
+
+TEST_F(AssemblerX8664Test, MovRegMem) {
+ static constexpr uint64_t Mask8 = 0x00000000000000FFull;
+ static constexpr uint64_t Mask16 = 0x000000000000FFFFull;
+ static constexpr uint64_t Mask32 = 0x00000000FFFFFFFFull;
+ static constexpr uint64_t Mask64 = 0xFFFFFFFFFFFFFFFFull;
+
+ static constexpr uint64_t MaskResult8 = ~0x00000000000000FFull;
+ static constexpr uint64_t MaskResult16 = ~0x000000000000FFFFull;
+ static constexpr uint64_t MaskResult32 = ~0xFFFFFFFFFFFFFFFFull;
+ static constexpr uint64_t MaskResult64 = ~0xFFFFFFFFFFFFFFFFull;
+
+#define TestRegAddr(Dst, Suffix, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", Addr, " #Suffix ", " #Size ")"; \
+ const uint8_t T0 = allocateQword(); \
+ static constexpr uint64_t Value = 0xA4DD30Af86CCE321ull & Mask##Size; \
+ const uint8_t T1 = allocateQword(); \
+ static constexpr uint64_t Marker = 0xC0FFEEA0BEEFFEEFull; \
+ \
+ __ mov(IceType_i64, Encoded_GPR_##Dst(), dwordAddress(T1)); \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst(), dwordAddress(T0)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setQwordTo(T0, Value); \
+ test.setQwordTo(T1, Marker); \
+ test.run(); \
+ \
+ ASSERT_EQ((Marker & MaskResult##Size) | Value, test.Dst()) << TestString; \
+ ASSERT_EQ(Value, test.Dst##Suffix()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImpl(Dst) \
+ do { \
+ TestRegAddr(Dst, l, 8); \
+ TestRegAddr(Dst, w, 16); \
+ TestRegAddr(Dst, d, 32); \
+ TestRegAddr(Dst, q, 64); \
+ } while (0)
+
+ TestImpl(r1);
+ TestImpl(r2);
+ TestImpl(r3);
+ TestImpl(r4);
+ TestImpl(r5);
+ TestImpl(r6);
+ TestImpl(r7);
+ TestImpl(r8);
+ TestImpl(r10);
+ TestImpl(r11);
+ TestImpl(r12);
+ TestImpl(r13);
+ TestImpl(r14);
+ TestImpl(r15);
+
+#undef TestImpl
+#undef TestRegAddr
+}
+
+TEST_F(AssemblerX8664Test, J) {
+#define TestJ(C, Near, Dest, Src0, Value0, Src1, Value1) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #C ", " #Near ", " #Dest ", " #Src0 ", " #Value0 ", " #Src1 \
+ ", " #Value1 ")"; \
+ const bool NearJmp = AssemblerX8664::k##Near##Jump; \
+ Label ShouldBeTaken; \
+ __ mov(IceType_i32, Encoded_GPR_##Src0(), Immediate(Value0)); \
+ __ mov(IceType_i32, Encoded_GPR_##Src1(), Immediate(Value1)); \
+ __ mov(IceType_i32, Encoded_GPR_##Dest(), Immediate(0xBEEF)); \
+ __ cmp(IceType_i32, Encoded_GPR_##Src0(), Encoded_GPR_##Src1()); \
+ __ j(Cond::Br_##C, &ShouldBeTaken, NearJmp); \
+ __ mov(IceType_i32, Encoded_GPR_##Dest(), Immediate(0xC0FFEE)); \
+ __ bind(&ShouldBeTaken); \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ ASSERT_EQ(Value0, test.Src0()) << TestString; \
+ ASSERT_EQ(Value1, test.Src1()) << TestString; \
+ ASSERT_EQ(0xBEEFul, test.Dest()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImpl(Dst, Src0, Src1) \
+ do { \
+ TestJ(o, Near, Dst, Src0, 0x80000000ul, Src1, 0x1ul); \
+ TestJ(o, Far, Dst, Src0, 0x80000000ul, Src1, 0x1ul); \
+ TestJ(no, Near, Dst, Src0, 0x1ul, Src1, 0x1ul); \
+ TestJ(no, Far, Dst, Src0, 0x1ul, Src1, 0x1ul); \
+ TestJ(b, Near, Dst, Src0, 0x1ul, Src1, 0x80000000ul); \
+ TestJ(b, Far, Dst, Src0, 0x1ul, Src1, 0x80000000ul); \
+ TestJ(ae, Near, Dst, Src0, 0x80000000ul, Src1, 0x1ul); \
+ TestJ(ae, Far, Dst, Src0, 0x80000000ul, Src1, 0x1ul); \
+ TestJ(e, Near, Dst, Src0, 0x80000000ul, Src1, 0x80000000ul); \
+ TestJ(e, Far, Dst, Src0, 0x80000000ul, Src1, 0x80000000ul); \
+ TestJ(ne, Near, Dst, Src0, 0x80000000ul, Src1, 0x1ul); \
+ TestJ(ne, Far, Dst, Src0, 0x80000000ul, Src1, 0x1ul); \
+ TestJ(be, Near, Dst, Src0, 0x1ul, Src1, 0x80000000ul); \
+ TestJ(be, Far, Dst, Src0, 0x1ul, Src1, 0x80000000ul); \
+ TestJ(a, Near, Dst, Src0, 0x80000000ul, Src1, 0x1ul); \
+ TestJ(a, Far, Dst, Src0, 0x80000000ul, Src1, 0x1ul); \
+ TestJ(s, Near, Dst, Src0, 0x1ul, Src1, 0x80000000ul); \
+ TestJ(s, Far, Dst, Src0, 0x1ul, Src1, 0x80000000ul); \
+ TestJ(ns, Near, Dst, Src0, 0x80000000ul, Src1, 0x1ul); \
+ TestJ(ns, Far, Dst, Src0, 0x80000000ul, Src1, 0x1ul); \
+ TestJ(p, Near, Dst, Src0, 0x80000000ul, Src1, 0x1ul); \
+ TestJ(p, Far, Dst, Src0, 0x80000000ul, Src1, 0x1ul); \
+ TestJ(np, Near, Dst, Src0, 0x1ul, Src1, 0x80000000ul); \
+ TestJ(np, Far, Dst, Src0, 0x1ul, Src1, 0x80000000ul); \
+ TestJ(l, Near, Dst, Src0, 0x80000000ul, Src1, 0x1ul); \
+ TestJ(l, Far, Dst, Src0, 0x80000000ul, Src1, 0x1ul); \
+ TestJ(ge, Near, Dst, Src0, 0x1ul, Src1, 0x80000000ul); \
+ TestJ(ge, Far, Dst, Src0, 0x1ul, Src1, 0x80000000ul); \
+ TestJ(le, Near, Dst, Src0, 0x80000000ul, Src1, 0x1ul); \
+ TestJ(le, Far, Dst, Src0, 0x80000000ul, Src1, 0x1ul); \
+ TestJ(g, Near, Dst, Src0, 0x1ul, Src1, 0x80000000ul); \
+ TestJ(g, Far, Dst, Src0, 0x1ul, Src1, 0x80000000ul); \
+ } while (0)
+
+ TestImpl(r1, r2, r3);
+ TestImpl(r2, r3, r4);
+ TestImpl(r3, r4, r5);
+ TestImpl(r4, r5, r6);
+ TestImpl(r5, r6, r7);
+ TestImpl(r6, r7, r8);
+ TestImpl(r7, r8, r10);
+ TestImpl(r8, r10, r11);
+ TestImpl(r10, r11, r12);
+ TestImpl(r11, r12, r13);
+ TestImpl(r12, r13, r14);
+ TestImpl(r13, r14, r15);
+ TestImpl(r14, r15, r1);
+ TestImpl(r15, r1, r2);
+
+#undef TestImpl
+#undef TestJ
+}
+
+TEST_F(AssemblerX8664Test, PopAddr) {
+ const uint32_t T0 = allocateQword();
+ constexpr uint64_t V0 = 0x3AABBEFABBBAA3ull;
+
+ __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(0xC0FFEE));
+ __ pushl(GPRRegister::Encoded_Reg_eax);
+ __ popl(dwordAddress(T0));
+
+ AssembledTest test = assemble();
+ test.setQwordTo(T0, V0);
+
+ test.run();
+
+ ASSERT_EQ(0xC0FFEEul, test.contentsOfQword(T0));
+}
+
+TEST_F(AssemblerX8664Test, SetCC) {
+#define TestSetCC(C, Dest, IsTrue, Src0, Value0, Src1, Value1) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #C ", " #Dest ", " #IsTrue ", " #Src0 ", " #Value0 ", " #Src1 \
+ ", " #Value1 ")"; \
+ const uint32_t T0 = allocateDword(); \
+ constexpr uint32_t V0 = 0xF00F00; \
+ __ mov(IceType_i32, Encoded_GPR_##Src0(), Immediate(Value0)); \
+ __ mov(IceType_i32, Encoded_GPR_##Src1(), Immediate(Value1)); \
+ __ cmp(IceType_i32, Encoded_GPR_##Src0(), Encoded_GPR_##Src1()); \
+ __ mov(IceType_i32, Encoded_GPR_##Dest(), Immediate(0)); \
+ __ setcc(Cond::Br_##C, RegX8664::getEncodedByteReg(Encoded_GPR_##Dest())); \
+ __ setcc(Cond::Br_##C, dwordAddress(T0)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, V0); \
+ \
+ test.run(); \
+ \
+ ASSERT_EQ(IsTrue, test.Dest()) << TestString; \
+ ASSERT_EQ((0xF00F00 | IsTrue), test.contentsOfDword(T0)) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImpl(Dest, Src0, Src1) \
+ do { \
+ TestSetCC(o, Dest, 1u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestSetCC(o, Dest, 0u, Src0, 0x1u, Src1, 0x10000000u); \
+ TestSetCC(no, Dest, 1u, Src0, 0x1u, Src1, 0x10000000u); \
+ TestSetCC(no, Dest, 0u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestSetCC(b, Dest, 1u, Src0, 0x1, Src1, 0x80000000u); \
+ TestSetCC(b, Dest, 0u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestSetCC(ae, Dest, 1u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestSetCC(ae, Dest, 0u, Src0, 0x1u, Src1, 0x80000000u); \
+ TestSetCC(e, Dest, 1u, Src0, 0x1u, Src1, 0x1u); \
+ TestSetCC(e, Dest, 0u, Src0, 0x1u, Src1, 0x11111u); \
+ TestSetCC(ne, Dest, 1u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestSetCC(ne, Dest, 0u, Src0, 0x1u, Src1, 0x1u); \
+ TestSetCC(be, Dest, 1u, Src0, 0x1u, Src1, 0x80000000u); \
+ TestSetCC(be, Dest, 0u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestSetCC(a, Dest, 1u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestSetCC(a, Dest, 0u, Src0, 0x1u, Src1, 0x80000000u); \
+ TestSetCC(s, Dest, 1u, Src0, 0x1u, Src1, 0x80000000u); \
+ TestSetCC(s, Dest, 0u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestSetCC(ns, Dest, 1u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestSetCC(ns, Dest, 0u, Src0, 0x1u, Src1, 0x80000000u); \
+ TestSetCC(p, Dest, 1u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestSetCC(p, Dest, 0u, Src0, 0x1u, Src1, 0x80000000u); \
+ TestSetCC(np, Dest, 1u, Src0, 0x1u, Src1, 0x80000000u); \
+ TestSetCC(np, Dest, 0u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestSetCC(l, Dest, 1u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestSetCC(l, Dest, 0u, Src0, 0x1u, Src1, 0x80000000u); \
+ TestSetCC(ge, Dest, 1u, Src0, 0x1u, Src1, 0x80000000u); \
+ TestSetCC(ge, Dest, 0u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestSetCC(le, Dest, 1u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestSetCC(le, Dest, 0u, Src0, 0x1u, Src1, 0x80000000u); \
+ } while (0)
+
+ TestImpl(r1, r2, r3);
+ TestImpl(r2, r3, r4);
+ TestImpl(r3, r4, r5);
+ TestImpl(r4, r5, r6);
+ TestImpl(r5, r6, r7);
+ TestImpl(r6, r7, r8);
+ TestImpl(r7, r8, r10);
+ TestImpl(r8, r10, r11);
+ TestImpl(r10, r11, r12);
+ TestImpl(r11, r12, r13);
+ TestImpl(r12, r13, r14);
+ TestImpl(r13, r14, r15);
+ TestImpl(r14, r15, r1);
+ TestImpl(r15, r1, r2);
+
+#undef TestImpl
+#undef TestSetCC
+}
+
+TEST_F(AssemblerX8664Test, CallImm) {
+ __ call(Immediate(16));
+ __ hlt();
+ __ hlt();
+ __ hlt();
+ __ hlt();
+ __ hlt();
+ __ hlt();
+ __ hlt();
+ __ hlt();
+ __ hlt();
+ __ hlt();
+ __ hlt();
+ __ hlt();
+ __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(0xf00f));
+ __ popl(GPRRegister::Encoded_Reg_ebx);
+
+ AssembledTest test = assemble();
+
+ test.run();
+
+ EXPECT_EQ(0xF00Fu, test.eax());
+}
+
+TEST_F(AssemblerX8664Test, CallReg) {
+#define TestImpl(Dst, Src) \
+ do { \
+ __ call(Immediate(16)); \
+ int CallTargetAddr = codeBytesSize() + 12; \
+ __ popl(Encoded_GPR_##Dst()); \
+ __ pushl(Encoded_GPR_##Dst()); \
+ __ ret(); \
+ for (int I = codeBytesSize(); I < CallTargetAddr; ++I) { \
+ __ hlt(); \
+ } \
+ __ popl(Encoded_GPR_##Src()); \
+ __ call(Encoded_GPR_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ \
+ test.run(); \
+ \
+ ASSERT_LE(15u, test.Dst() - test.Src()) << "(" #Dst ", " #Src ")"; \
+ reset(); \
+ } while (0)
+
+ TestImpl(r1, r2);
+ TestImpl(r2, r3);
+ TestImpl(r3, r4);
+ TestImpl(r4, r5);
+ TestImpl(r5, r6);
+ TestImpl(r6, r7);
+ TestImpl(r7, r8);
+ TestImpl(r8, r10);
+ TestImpl(r10, r11);
+ TestImpl(r11, r12);
+ TestImpl(r12, r13);
+ TestImpl(r13, r14);
+ TestImpl(r14, r15);
+ TestImpl(r15, r1);
+
+#undef TestImpl
+}
+
+TEST_F(AssemblerX8664Test, CallAddr) {
+#define TestImpl(Dst, Src) \
+ do { \
+ const uint32_t T0 = allocateQword(); \
+ const uint64_t V0 = 0xA0C0FFEEBEEFFEEFull; \
+ __ call(Immediate(16)); \
+ int CallTargetAddr = codeBytesSize() + 12; \
+ __ mov(IceType_i8, Encoded_GPR_##Dst##l(), Immediate(0xf4)); \
+ __ ret(); \
+ for (int I = codeBytesSize(); I < CallTargetAddr; ++I) { \
+ __ hlt(); \
+ } \
+ __ mov(IceType_i64, Encoded_GPR_##Dst##q(), dwordAddress(T0)); \
+ __ mov(IceType_i64, Encoded_GPR_##Src##q(), Encoded_GPR_rsp()); \
+ __ call(Address(Encoded_GPR_##Src##q(), 0)); \
+ __ popl(Encoded_GPR_##Src##q()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setQwordTo(T0, V0); \
+ test.run(); \
+ \
+ ASSERT_EQ(0xA0C0FFEEBEEFFEF4ull, test.Dst##q()) << "(" #Dst ", " #Src ")"; \
+ reset(); \
+ } while (0)
+
+ TestImpl(r1, r2);
+ TestImpl(r2, r3);
+ TestImpl(r3, r4);
+ TestImpl(r4, r5);
+ TestImpl(r5, r6);
+ TestImpl(r6, r7);
+ TestImpl(r7, r8);
+ TestImpl(r8, r10);
+ TestImpl(r10, r11);
+ TestImpl(r11, r12);
+ TestImpl(r12, r13);
+ TestImpl(r13, r14);
+ TestImpl(r14, r15);
+ TestImpl(r15, r1);
+
+#undef TestImpl
+}
+
+TEST_F(AssemblerX8664Test, Movzx) {
+ static constexpr uint32_t Mask8 = 0x000000FF;
+ static constexpr uint32_t Mask16 = 0x0000FFFF;
+
+#define TestImplRegReg(Dst, Src, Suffix, Size) \
+ do { \
+ const uint32_t T0 = allocateDqword(); \
+ static constexpr uint64_t V0 = 0xAAAAAAAAAAAAAAAAull; \
+ static constexpr uint32_t Value = (0xBEEF) & Mask##Size; \
+ __ mov(IceType_i64, Encoded_GPR_##Dst##q(), dwordAddress(T0)); \
+ __ mov(IceType_i##Size, Encoded_GPR_##Src##Suffix(), Immediate(Value)); \
+ __ movzx(IceType_i##Size, Encoded_GPR_##Dst##d(), \
+ Encoded_GPR_##Src##Suffix()); \
+ AssembledTest test = assemble(); \
+ test.setQwordTo(T0, V0); \
+ test.run(); \
+ ASSERT_EQ(Value, test.Dst##q()) << "(" #Dst ", " #Src ", " #Size ")"; \
+ reset(); \
+ } while (0)
+
+#define TestImplRegAddr(Dst, Suffix, Size) \
+ do { \
+ const uint32_t T0 = allocateDqword(); \
+ static constexpr uint64_t V0 = 0xAAAAAAAAAAAAAAAAull; \
+ static constexpr uint32_t Value = (0xBEEF) & Mask##Size; \
+ __ movzx(IceType_i##Size, Encoded_GPR_##Dst##d(), dwordAddress(T0)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setQwordTo(T0, (V0 & ~Mask##Size) | Value); \
+ test.run(); \
+ ASSERT_EQ(Value, test.Dst##q()) << "(" #Dst ", Addr, " #Size ")"; \
+ reset(); \
+ } while (0)
+
+#define TestImpl(Dst, Src) \
+ do { \
+ TestImplRegReg(Dst, Src, l, 8); \
+ TestImplRegAddr(Dst, l, 8); \
+ TestImplRegReg(Dst, Src, w, 16); \
+ TestImplRegAddr(Dst, w, 16); \
+ } while (0)
+
+ TestImpl(r1, r2);
+ TestImpl(r2, r3);
+ TestImpl(r3, r4);
+ TestImpl(r4, r5);
+ TestImpl(r5, r6);
+ TestImpl(r6, r7);
+ TestImpl(r7, r8);
+ TestImpl(r8, r10);
+ TestImpl(r10, r11);
+ TestImpl(r11, r12);
+ TestImpl(r12, r13);
+ TestImpl(r13, r14);
+ TestImpl(r14, r15);
+ TestImpl(r15, r1);
+
+#undef TestImpl
+#undef TestImplRegAddr
+#undef TestImplRegReg
+}
+
+TEST_F(AssemblerX8664Test, Movsx) {
+ static constexpr uint64_t Mask8 = 0x000000FF;
+ static constexpr uint64_t Mask16 = 0x0000FFFF;
+ static constexpr uint64_t Mask32 = 0xFFFFFFFF;
+
+#define TestImplRegReg(Dst, Src, Suffix, Size) \
+ do { \
+ const uint32_t T0 = allocateDqword(); \
+ static constexpr uint64_t V0 = 0xAAAAAAAAAAAAAAAAull; \
+ static constexpr uint64_t Value = (0xC0BEBEEF) & Mask##Size; \
+ __ mov(IceType_i64, Encoded_GPR_##Dst##q(), dwordAddress(T0)); \
+ __ mov(IceType_i##Size, Encoded_GPR_##Src##Suffix(), Immediate(Value)); \
+ __ movsx(IceType_i##Size, Encoded_GPR_##Dst##d(), \
+ Encoded_GPR_##Src##Suffix()); \
+ AssembledTest test = assemble(); \
+ test.setQwordTo(T0, V0); \
+ test.run(); \
+ ASSERT_EQ((uint64_t(-1) & ~Mask##Size) | Value, test.Dst##q()) \
+ << "(" #Dst ", " #Src ", " #Size ")"; \
+ reset(); \
+ } while (0)
+
+#define TestImplRegAddr(Dst, Suffix, Size) \
+ do { \
+ const uint32_t T0 = allocateDqword(); \
+ static constexpr uint64_t V0 = 0xC0BEBEEF & Mask##Size; \
+ static constexpr uint64_t Value = (0xC0BEBEEF) & Mask##Size; \
+ __ movsx(IceType_i##Size, Encoded_GPR_##Dst##d(), dwordAddress(T0)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setQwordTo(T0, V0); \
+ test.run(); \
+ ASSERT_EQ((uint64_t(-1) & ~Mask##Size) | Value, test.Dst##q()) \
+ << "(" #Dst ", Addr, " #Size ")"; \
+ reset(); \
+ } while (0)
+
+#define TestImpl(Dst, Src) \
+ do { \
+ TestImplRegReg(Dst, Src, l, 8); \
+ TestImplRegAddr(Dst, l, 8); \
+ TestImplRegReg(Dst, Src, w, 16); \
+ TestImplRegAddr(Dst, w, 16); \
+ TestImplRegReg(Dst, Src, w, 32); \
+ TestImplRegAddr(Dst, w, 32); \
+ } while (0)
+
+ TestImpl(r1, r2);
+ TestImpl(r2, r3);
+ TestImpl(r3, r4);
+ TestImpl(r4, r5);
+ TestImpl(r5, r6);
+ TestImpl(r6, r7);
+ TestImpl(r7, r8);
+ TestImpl(r8, r10);
+ TestImpl(r10, r11);
+ TestImpl(r11, r12);
+ TestImpl(r12, r13);
+ TestImpl(r13, r14);
+ TestImpl(r14, r15);
+ TestImpl(r15, r1);
+
+#undef TestImpl
+#undef TestImplRegAddr
+#undef TestImplRegReg
+}
+
+TEST_F(AssemblerX8664Test, Lea) {
+#define TestLeaBaseDisp(Base, BaseValue, Disp, Dst) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Base ", " #BaseValue ", " #Dst ")"; \
+ if (Encoded_GPR_##Base() != Encoded_GPR_esp() && \
+ Encoded_GPR_##Base() != Encoded_GPR_r9()) { \
+ __ mov(IceType_i32, Encoded_GPR_##Base(), Immediate(BaseValue)); \
+ } \
+ __ lea(IceType_i32, Encoded_GPR_##Dst(), \
+ Address(Encoded_GPR_##Base(), Disp)); \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ ASSERT_EQ(test.Base##d() + (Disp), test.Dst##d()) \
+ << TestString << " with Disp " << Disp; \
+ reset(); \
+ } while (0)
+
+#define TestLeaIndex32bitDisp(Index, IndexValue, Disp, Dst0, Dst1, Dst2, Dst3) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Index ", " #IndexValue ", " #Dst0 ", " #Dst1 ", " #Dst2 \
+ ", " #Dst3 ")"; \
+ if (Encoded_GPR_##Index() != Encoded_GPR_r9()) { \
+ __ mov(IceType_i32, Encoded_GPR_##Index(), Immediate(IndexValue)); \
+ } \
+ __ lea(IceType_i32, Encoded_GPR_##Dst0(), \
+ Address(Encoded_GPR_##Index(), Traits::TIMES_1, Disp)); \
+ __ lea(IceType_i32, Encoded_GPR_##Dst1(), \
+ Address(Encoded_GPR_##Index(), Traits::TIMES_2, Disp)); \
+ __ lea(IceType_i32, Encoded_GPR_##Dst2(), \
+ Address(Encoded_GPR_##Index(), Traits::TIMES_4, Disp)); \
+ __ lea(IceType_i32, Encoded_GPR_##Dst3(), \
+ Address(Encoded_GPR_##Index(), Traits::TIMES_8, Disp)); \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ ASSERT_EQ((test.Index##d() << Traits::TIMES_1) + (Disp), test.Dst0##d()) \
+ << TestString << " " << Disp; \
+ ASSERT_EQ((test.Index##d() << Traits::TIMES_2) + (Disp), test.Dst1##d()) \
+ << TestString << " " << Disp; \
+ ASSERT_EQ((test.Index##d() << Traits::TIMES_4) + (Disp), test.Dst2##d()) \
+ << TestString << " " << Disp; \
+ ASSERT_EQ((test.Index##d() << Traits::TIMES_8) + (Disp), test.Dst3##d()) \
+ << TestString << " " << Disp; \
+ reset(); \
+ } while (0)
+
+#define TestLeaBaseIndexDisp(Base, BaseValue, Index, IndexValue, Disp, Dst0, \
+ Dst1, Dst2, Dst3) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Base ", " #BaseValue ", " #Index ", " #IndexValue ", " #Dst0 \
+ ", " #Dst1 ", " #Dst2 ", " #Dst3 ")"; \
+ if (Encoded_GPR_##Base() != Encoded_GPR_esp() && \
+ Encoded_GPR_##Base() != Encoded_GPR_r9()) { \
+ __ mov(IceType_i32, Encoded_GPR_##Base(), Immediate(BaseValue)); \
+ } \
+ \
+ if (Encoded_GPR_##Index() != Encoded_GPR_r9()) { \
+ __ mov(IceType_i32, Encoded_GPR_##Index(), Immediate(IndexValue)); \
+ } \
+ \
+ __ lea(IceType_i32, Encoded_GPR_##Dst0(), \
+ Address(Encoded_GPR_##Base(), Encoded_GPR_##Index(), \
+ Traits::TIMES_1, Disp)); \
+ __ lea(IceType_i32, Encoded_GPR_##Dst1(), \
+ Address(Encoded_GPR_##Base(), Encoded_GPR_##Index(), \
+ Traits::TIMES_2, Disp)); \
+ __ lea(IceType_i32, Encoded_GPR_##Dst2(), \
+ Address(Encoded_GPR_##Base(), Encoded_GPR_##Index(), \
+ Traits::TIMES_4, Disp)); \
+ __ lea(IceType_i32, Encoded_GPR_##Dst3(), \
+ Address(Encoded_GPR_##Base(), Encoded_GPR_##Index(), \
+ Traits::TIMES_8, Disp)); \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ uint32_t ExpectedIndexValue = test.Index(); \
+ if (Encoded_GPR_##Index() == Encoded_GPR_esp()) { \
+ ExpectedIndexValue = 0; \
+ } \
+ ASSERT_EQ(test.Base##d() + (ExpectedIndexValue << Traits::TIMES_1) + \
+ (Disp), \
+ test.Dst0##d()) \
+ << TestString << " " << Disp; \
+ ASSERT_EQ(test.Base##d() + (ExpectedIndexValue << Traits::TIMES_2) + \
+ (Disp), \
+ test.Dst1##d()) \
+ << TestString << " " << Disp; \
+ ASSERT_EQ(test.Base##d() + (ExpectedIndexValue << Traits::TIMES_4) + \
+ (Disp), \
+ test.Dst2##d()) \
+ << TestString << " " << Disp; \
+ ASSERT_EQ(test.Base##d() + (ExpectedIndexValue << Traits::TIMES_8) + \
+ (Disp), \
+ test.Dst3##d()) \
+ << TestString << " " << Disp; \
+ reset(); \
+ } while (0)
+
+ for (const int32_t Disp :
+ {0x00, 0x06, -0x06, 0x0600, -0x6000, 0x6000000, -0x6000000}) {
+ TestLeaBaseDisp(r0, 0x22080Fu, Disp, r1);
+ TestLeaBaseDisp(r1, 0x10000Fu, Disp, r2);
+ TestLeaBaseDisp(r2, 0x20000Fu, Disp, r3);
+ TestLeaBaseDisp(r3, 0x30000Fu, Disp, r4);
+ TestLeaBaseDisp(r4, 0x40000Fu, Disp, r5);
+ TestLeaBaseDisp(r5, 0x50000Fu, Disp, r6);
+ TestLeaBaseDisp(r6, 0x60000Fu, Disp, r7);
+ TestLeaBaseDisp(r7, 0x11000Fu, Disp, r8);
+ TestLeaBaseDisp(r8, 0x11200Fu, Disp, r10);
+ TestLeaBaseDisp(r9, 0x000000u, Disp, r10);
+ TestLeaBaseDisp(r10, 0x22000Fu, Disp, r11);
+ TestLeaBaseDisp(r11, 0x22030Fu, Disp, r12);
+ TestLeaBaseDisp(r12, 0x22040Fu, Disp, r13);
+ TestLeaBaseDisp(r13, 0x22050Fu, Disp, r14);
+ TestLeaBaseDisp(r14, 0x22060Fu, Disp, r15);
+ TestLeaBaseDisp(r15, 0x22070Fu, Disp, r1);
+ }
+
+ // esp is not a valid index register.
+ // ebp is not valid in this addressing mode (rm = 0).
+ for (const int32_t Disp :
+ {0x00, 0x06, -0x06, 0x0600, -0x6000, 0x6000000, -0x6000000}) {
+ TestLeaIndex32bitDisp(r1, 0x2000u, Disp, r2, r3, r4, r6);
+ TestLeaIndex32bitDisp(r2, 0x4010u, Disp, r3, r4, r6, r7);
+ TestLeaIndex32bitDisp(r3, 0x6020u, Disp, r4, r6, r7, r5);
+ TestLeaIndex32bitDisp(r4, 0x8030u, Disp, r6, r7, r5, r10);
+ TestLeaIndex32bitDisp(r6, 0xA040u, Disp, r7, r5, r10, r1);
+ TestLeaIndex32bitDisp(r7, 0xC050u, Disp, r5, r10, r1, r11);
+ TestLeaIndex32bitDisp(r8, 0xC060u, Disp, r10, r1, r11, r12);
+ TestLeaIndex32bitDisp(r9, 0x0000u, Disp, r1, r11, r12, r13);
+ TestLeaIndex32bitDisp(r10, 0xC008u, Disp, r11, r12, r13, r14);
+ TestLeaIndex32bitDisp(r11, 0xC009u, Disp, r12, r13, r14, r15);
+ TestLeaIndex32bitDisp(r12, 0xC00Au, Disp, r13, r14, r15, r1);
+ TestLeaIndex32bitDisp(r13, 0xC00Bu, Disp, r14, r15, r1, r2);
+ TestLeaIndex32bitDisp(r14, 0xC00Cu, Disp, r15, r1, r2, r3);
+ TestLeaIndex32bitDisp(r15, 0xC00Du, Disp, r1, r2, r3, r4);
+ }
+
+ for (const int32_t Disp :
+ {0x00, 0x06, -0x06, 0x0600, -0x6000, 0x6000000, -0x6000000}) {
+ TestLeaBaseIndexDisp(r1, 0x100000u, r2, 0x600u, Disp, r3, r4, r6, r7);
+ TestLeaBaseIndexDisp(r2, 0x200000u, r3, 0x500u, Disp, r4, r6, r7, r8);
+ TestLeaBaseIndexDisp(r3, 0x300000u, r4, 0x400u, Disp, r6, r7, r8, r5);
+ TestLeaBaseIndexDisp(r4, 0x400000u, r6, 0x300u, Disp, r7, r8, r5, r10);
+ TestLeaBaseIndexDisp(r6, 0x500000u, r7, 0x200u, Disp, r8, r5, r10, r11);
+ TestLeaBaseIndexDisp(r7, 0x600000u, r8, 0x100u, Disp, r5, r10, r11, r12);
+ TestLeaBaseIndexDisp(r8, 0x600000u, r9, 0x1A0u, Disp, r10, r11, r12, r13);
+ TestLeaBaseIndexDisp(r9, 0x000000u, r10, 0x1B0u, Disp, r11, r12, r13, r14);
+ TestLeaBaseIndexDisp(r10, 0x602000u, r11, 0x1C0u, Disp, r12, r13, r14, r15);
+ TestLeaBaseIndexDisp(r11, 0x603000u, r12, 0x1D0u, Disp, r13, r14, r15, r1);
+ TestLeaBaseIndexDisp(r12, 0x604000u, r13, 0x1E0u, Disp, r14, r15, r1, r2);
+ TestLeaBaseIndexDisp(r13, 0x605000u, r14, 0x1F0u, Disp, r15, r1, r2, r3);
+ TestLeaBaseIndexDisp(r14, 0x606000u, r15, 0x10Au, Disp, r1, r2, r3, r4);
+ TestLeaBaseIndexDisp(r15, 0x607000u, r1, 0x10Bu, Disp, r2, r3, r4, r6);
+
+ TestLeaBaseIndexDisp(r0, 0, r2, 0x600u, Disp, r3, r4, r6, r7);
+ TestLeaBaseIndexDisp(r0, 0, r3, 0x500u, Disp, r4, r6, r7, r8);
+ TestLeaBaseIndexDisp(r0, 0, r4, 0x400u, Disp, r6, r7, r8, r5);
+ TestLeaBaseIndexDisp(r0, 0, r6, 0x300u, Disp, r7, r8, r5, r10);
+ TestLeaBaseIndexDisp(r0, 0, r7, 0x200u, Disp, r8, r5, r10, r11);
+ TestLeaBaseIndexDisp(r0, 0, r8, 0x100u, Disp, r5, r10, r11, r12);
+ TestLeaBaseIndexDisp(r0, 0, r9, 0x000u, Disp, r10, r11, r12, r13);
+ TestLeaBaseIndexDisp(r0, 0, r10, 0x1B0u, Disp, r11, r12, r13, r14);
+ TestLeaBaseIndexDisp(r0, 0, r11, 0x1C0u, Disp, r12, r13, r14, r15);
+ TestLeaBaseIndexDisp(r0, 0, r12, 0x1D0u, Disp, r13, r14, r15, r1);
+ TestLeaBaseIndexDisp(r0, 0, r13, 0x1E0u, Disp, r14, r15, r1, r2);
+ TestLeaBaseIndexDisp(r0, 0, r14, 0x1F0u, Disp, r15, r1, r2, r3);
+ TestLeaBaseIndexDisp(r0, 0, r15, 0x10Au, Disp, r1, r2, r3, r4);
+ TestLeaBaseIndexDisp(r0, 0, r1, 0x10Bu, Disp, r2, r3, r4, r6);
+
+ TestLeaBaseIndexDisp(r5, 0x100000u, r2, 0x600u, Disp, r3, r4, r6, r7);
+ TestLeaBaseIndexDisp(r5, 0x200000u, r3, 0x500u, Disp, r4, r6, r7, r8);
+ TestLeaBaseIndexDisp(r5, 0x300000u, r4, 0x400u, Disp, r6, r7, r8, r1);
+ TestLeaBaseIndexDisp(r5, 0x400000u, r6, 0x300u, Disp, r7, r8, r1, r10);
+ TestLeaBaseIndexDisp(r5, 0x500000u, r7, 0x200u, Disp, r8, r1, r10, r11);
+ TestLeaBaseIndexDisp(r5, 0x600000u, r8, 0x100u, Disp, r1, r10, r11, r12);
+ TestLeaBaseIndexDisp(r5, 0x600000u, r9, 0x000u, Disp, r10, r11, r12, r13);
+ TestLeaBaseIndexDisp(r5, 0x601000u, r10, 0x1B0u, Disp, r11, r12, r13, r14);
+ TestLeaBaseIndexDisp(r5, 0x602000u, r11, 0x1C0u, Disp, r12, r13, r14, r15);
+ TestLeaBaseIndexDisp(r5, 0x603000u, r12, 0x1D0u, Disp, r13, r14, r15, r1);
+ TestLeaBaseIndexDisp(r5, 0x604000u, r13, 0x1E0u, Disp, r14, r15, r1, r2);
+ TestLeaBaseIndexDisp(r5, 0x605000u, r14, 0x1F0u, Disp, r15, r1, r2, r3);
+ TestLeaBaseIndexDisp(r5, 0x606000u, r15, 0x10Au, Disp, r1, r2, r3, r4);
+ TestLeaBaseIndexDisp(r5, 0x607000u, r1, 0x10Bu, Disp, r2, r3, r4, r6);
+
+ TestLeaBaseIndexDisp(r2, 0x100000u, r5, 0x600u, Disp, r3, r4, r6, r7);
+ TestLeaBaseIndexDisp(r3, 0x200000u, r5, 0x500u, Disp, r4, r6, r7, r8);
+ TestLeaBaseIndexDisp(r4, 0x300000u, r5, 0x400u, Disp, r6, r7, r8, r1);
+ TestLeaBaseIndexDisp(r6, 0x400000u, r5, 0x300u, Disp, r7, r8, r1, r10);
+ TestLeaBaseIndexDisp(r7, 0x500000u, r5, 0x200u, Disp, r8, r1, r10, r11);
+ TestLeaBaseIndexDisp(r8, 0x600000u, r5, 0x100u, Disp, r1, r10, r11, r12);
+ TestLeaBaseIndexDisp(r9, 0x000000u, r5, 0x1A0u, Disp, r10, r11, r12, r13);
+ TestLeaBaseIndexDisp(r10, 0x601000u, r5, 0x1B0u, Disp, r11, r12, r13, r14);
+ TestLeaBaseIndexDisp(r11, 0x602000u, r5, 0x1C0u, Disp, r12, r13, r14, r15);
+ TestLeaBaseIndexDisp(r12, 0x603000u, r5, 0x1D0u, Disp, r13, r14, r15, r1);
+ TestLeaBaseIndexDisp(r13, 0x604000u, r5, 0x1E0u, Disp, r14, r15, r1, r2);
+ TestLeaBaseIndexDisp(r14, 0x605000u, r5, 0x1F0u, Disp, r15, r1, r2, r3);
+ TestLeaBaseIndexDisp(r15, 0x606000u, r5, 0x10Au, Disp, r1, r2, r3, r4);
+ TestLeaBaseIndexDisp(r1, 0x607000u, r5, 0x10Bu, Disp, r2, r3, r4, r6);
+
+ TestLeaBaseIndexDisp(r0, 0, r5, 0xC0BEBEEF, Disp, r2, r3, r4, r6);
+ }
+
+// Absolute addressing mode is tested in the Low Level tests. The encoding used
+// by the assembler has different meanings in x86-32 and x86-64.
+#undef TestLeaBaseIndexDisp
+#undef TestLeaScaled32bitDisp
+#undef TestLeaBaseDisp
+}
+
+TEST_F(AssemblerX8664LowLevelTest, LeaAbsolute) {
+#define TestLeaAbsolute(Dst, Value) \
+ do { \
+ static constexpr char TestString[] = "(" #Dst ", " #Value ")"; \
+ __ lea(IceType_i32, GPRRegister::Encoded_Reg_##Dst, \
+ Address(Address::ABSOLUTE, Value)); \
+ static constexpr uint32_t ByteCount = 6; \
+ ASSERT_EQ(ByteCount, codeBytesSize()) << TestString; \
+ static constexpr uint8_t Opcode = 0x8D; \
+ static constexpr uint8_t ModRM = \
+ /*mod=*/0x00 | /*reg*/ (GPRRegister::Encoded_Reg_##Dst << 3) | \
+ /*rm*/ GPRRegister::Encoded_Reg_ebp; \
+ ASSERT_TRUE(verifyBytes<ByteCount>( \
+ codeBytes(), Opcode, ModRM, (Value)&0xFF, (Value >> 8) & 0xFF, \
+ (Value >> 16) & 0xFF, (Value >> 24) & 0xFF)); \
+ reset(); \
+ } while (0)
+
+ TestLeaAbsolute(eax, 0x11BEEF22);
+ TestLeaAbsolute(ebx, 0x33BEEF44);
+ TestLeaAbsolute(ecx, 0x55BEEF66);
+ TestLeaAbsolute(edx, 0x77BEEF88);
+ TestLeaAbsolute(esi, 0x99BEEFAA);
+ TestLeaAbsolute(edi, 0xBBBEEFBB);
+
+#undef TesLeaAbsolute
+}
+
+TEST_F(AssemblerX8664Test, Cmov) {
+#define TestRegReg(C, Dest, IsTrue, Src0, Value0, Src1, Value1) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #C ", " #Dest ", " #IsTrue ", " #Src0 ", " #Value0 ", " #Src1 \
+ ", " #Value1 ")"; \
+ __ mov(IceType_i32, Encoded_GPR_##Src0(), Immediate(Value0)); \
+ __ mov(IceType_i32, Encoded_GPR_##Src1(), Immediate(Value1)); \
+ __ mov(IceType_i32, Encoded_GPR_##Dest(), Immediate(Value0)); \
+ __ cmp(IceType_i32, Encoded_GPR_##Src0(), Encoded_GPR_##Src1()); \
+ __ cmov(IceType_i32, Cond::Br_##C, Encoded_GPR_##Dest(), \
+ Encoded_GPR_##Src1()); \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ ASSERT_EQ((IsTrue) ? (Value1) : (Value0), test.Dest()) << TestString; \
+ \
+ reset(); \
+ } while (0)
+
+#define TestRegAddr(C, Dest, IsTrue, Src0, Value0, Value1) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #C ", " #Dest ", " #IsTrue ", " #Src0 ", " #Value0 \
+ ", Addr, " #Value1 ")"; \
+ const uint32_t T0 = allocateDword(); \
+ const uint32_t V0 = Value1; \
+ __ mov(IceType_i32, Encoded_GPR_##Src0(), Immediate(Value0)); \
+ __ mov(IceType_i32, Encoded_GPR_##Dest(), Immediate(Value0)); \
+ __ cmp(IceType_i32, Encoded_GPR_##Src0(), dwordAddress(T0)); \
+ __ cmov(IceType_i32, Cond::Br_##C, Encoded_GPR_##Dest(), \
+ dwordAddress(T0)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, V0); \
+ test.run(); \
+ ASSERT_EQ((IsTrue) ? (Value1) : (Value0), test.Dest()) << TestString; \
+ \
+ reset(); \
+ } while (0)
+
+#define TestValue(C, Dest, IsTrue, Src0, Value0, Src1, Value1) \
+ do { \
+ TestRegReg(C, Dest, IsTrue, Src0, Value0, Src1, Value1); \
+ TestRegAddr(C, Dest, IsTrue, Src0, Value0, Value1); \
+ } while (0)
+
+#define TestImpl(Dest, Src0, Src1) \
+ do { \
+ TestValue(o, Dest, 1u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestValue(o, Dest, 0u, Src0, 0x1u, Src1, 0x10000000u); \
+ TestValue(no, Dest, 1u, Src0, 0x1u, Src1, 0x10000000u); \
+ TestValue(no, Dest, 0u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestValue(b, Dest, 1u, Src0, 0x1, Src1, 0x80000000u); \
+ TestValue(b, Dest, 0u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestValue(ae, Dest, 1u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestValue(ae, Dest, 0u, Src0, 0x1u, Src1, 0x80000000u); \
+ TestValue(e, Dest, 1u, Src0, 0x1u, Src1, 0x1u); \
+ TestValue(e, Dest, 0u, Src0, 0x1u, Src1, 0x11111u); \
+ TestValue(ne, Dest, 1u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestValue(ne, Dest, 0u, Src0, 0x1u, Src1, 0x1u); \
+ TestValue(be, Dest, 1u, Src0, 0x1u, Src1, 0x80000000u); \
+ TestValue(be, Dest, 0u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestValue(a, Dest, 1u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestValue(a, Dest, 0u, Src0, 0x1u, Src1, 0x80000000u); \
+ TestValue(s, Dest, 1u, Src0, 0x1u, Src1, 0x80000000u); \
+ TestValue(s, Dest, 0u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestValue(ns, Dest, 1u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestValue(ns, Dest, 0u, Src0, 0x1u, Src1, 0x80000000u); \
+ TestValue(p, Dest, 1u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestValue(p, Dest, 0u, Src0, 0x1u, Src1, 0x80000000u); \
+ TestValue(np, Dest, 1u, Src0, 0x1u, Src1, 0x80000000u); \
+ TestValue(np, Dest, 0u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestValue(l, Dest, 1u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestValue(l, Dest, 0u, Src0, 0x1u, Src1, 0x80000000u); \
+ TestValue(ge, Dest, 1u, Src0, 0x1u, Src1, 0x80000000u); \
+ TestValue(ge, Dest, 0u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestValue(le, Dest, 1u, Src0, 0x80000000u, Src1, 0x1u); \
+ TestValue(le, Dest, 0u, Src0, 0x1u, Src1, 0x80000000u); \
+ } while (0)
+
+ TestImpl(r1, r2, r3);
+
+#undef TestImpl
+#undef TestValue
+#undef TestRegAddr
+#undef TestRegReg
+}
+
+TEST_F(AssemblerX8664LowLevelTest, RepMovsb) {
+ __ rep_movsb();
+
+ static constexpr uint32_t ByteCount = 2;
+ static constexpr uint8_t Prefix = 0xF3;
+ static constexpr uint8_t Opcode = 0xA4;
+
+ ASSERT_EQ(ByteCount, codeBytesSize());
+ verifyBytes<ByteCount>(codeBytes(), Prefix, Opcode);
+}
+
+TEST_F(AssemblerX8664Test, MovssXmmAddr) {
+#define TestMovssXmmAddrFloatLength(FloatLength, Xmm, Value) \
+ do { \
+ static_assert((FloatLength) == 32 || (FloatLength) == 64, \
+ "Invalid fp length #FloatLength"); \
+ using Type = std::conditional<FloatLength == 32, float, double>::type; \
+ \
+ static constexpr char TestString[] = "(" #FloatLength ", " #Xmm ")"; \
+ static constexpr bool IsDouble = std::is_same<Type, double>::value; \
+ const uint32_t T0 = allocateQword(); \
+ const Type V0 = Value; \
+ \
+ __ movss(IceType_f##FloatLength, Encoded_Xmm_##Xmm(), dwordAddress(T0)); \
+ \
+ AssembledTest test = assemble(); \
+ if (IsDouble) { \
+ test.setQwordTo(T0, static_cast<double>(V0)); \
+ } else { \
+ test.setDwordTo(T0, static_cast<float>(V0)); \
+ } \
+ test.run(); \
+ ASSERT_DOUBLE_EQ(Value, test.Xmm<Type>()) << TestString << " value is " \
+ << Value; \
+ reset(); \
+ } while (0)
+
+#define TestMovssXmmAddr(FloatLength) \
+ do { \
+ using Type = std::conditional<FloatLength == 32, float, double>::type; \
+ for (const Type Value : {0.0, -0.0, 1.0, -1.0, 3.14, 99999.9999}) { \
+ TestMovssXmmAddrFloatLength(FloatLength, xmm0, Value); \
+ TestMovssXmmAddrFloatLength(FloatLength, xmm1, Value); \
+ TestMovssXmmAddrFloatLength(FloatLength, xmm2, Value); \
+ TestMovssXmmAddrFloatLength(FloatLength, xmm3, Value); \
+ TestMovssXmmAddrFloatLength(FloatLength, xmm4, Value); \
+ TestMovssXmmAddrFloatLength(FloatLength, xmm5, Value); \
+ TestMovssXmmAddrFloatLength(FloatLength, xmm6, Value); \
+ TestMovssXmmAddrFloatLength(FloatLength, xmm7, Value); \
+ TestMovssXmmAddrFloatLength(FloatLength, xmm8, Value); \
+ TestMovssXmmAddrFloatLength(FloatLength, xmm9, Value); \
+ TestMovssXmmAddrFloatLength(FloatLength, xmm10, Value); \
+ TestMovssXmmAddrFloatLength(FloatLength, xmm11, Value); \
+ TestMovssXmmAddrFloatLength(FloatLength, xmm12, Value); \
+ TestMovssXmmAddrFloatLength(FloatLength, xmm13, Value); \
+ TestMovssXmmAddrFloatLength(FloatLength, xmm14, Value); \
+ TestMovssXmmAddrFloatLength(FloatLength, xmm15, Value); \
+ } \
+ } while (0)
+
+ TestMovssXmmAddr(32);
+ TestMovssXmmAddr(64);
+
+#undef TestMovssXmmAddr
+#undef TestMovssXmmAddrType
+}
+
+TEST_F(AssemblerX8664Test, MovssAddrXmm) {
+#define TestMovssAddrXmmFloatLength(FloatLength, Xmm, Value) \
+ do { \
+ static_assert((FloatLength) == 32 || (FloatLength) == 64, \
+ "Invalid fp length #FloatLength"); \
+ using Type = std::conditional<FloatLength == 32, float, double>::type; \
+ \
+ static constexpr char TestString[] = "(" #FloatLength ", " #Xmm ")"; \
+ static constexpr bool IsDouble = std::is_same<Type, double>::value; \
+ const uint32_t T0 = allocateQword(); \
+ const Type V0 = Value; \
+ const uint32_t T1 = allocateQword(); \
+ static_assert(std::numeric_limits<Type>::has_quiet_NaN, \
+ "f" #FloatLength " does not have quiet nan."); \
+ const Type V1 = std::numeric_limits<Type>::quiet_NaN(); \
+ \
+ __ movss(IceType_f##FloatLength, Encoded_Xmm_##Xmm(), dwordAddress(T0)); \
+ \
+ AssembledTest test = assemble(); \
+ if (IsDouble) { \
+ test.setQwordTo(T0, static_cast<double>(V0)); \
+ test.setQwordTo(T1, static_cast<double>(V1)); \
+ } else { \
+ test.setDwordTo(T0, static_cast<float>(V0)); \
+ test.setDwordTo(T1, static_cast<float>(V1)); \
+ } \
+ test.run(); \
+ ASSERT_DOUBLE_EQ(Value, test.Xmm<Type>()) << TestString << " value is " \
+ << Value; \
+ reset(); \
+ } while (0)
+
+#define TestMovssAddrXmm(FloatLength) \
+ do { \
+ using Type = std::conditional<FloatLength == 32, float, double>::type; \
+ for (const Type Value : {0.0, -0.0, 1.0, -1.0, 3.14, 99999.9999}) { \
+ TestMovssAddrXmmFloatLength(FloatLength, xmm0, Value); \
+ TestMovssAddrXmmFloatLength(FloatLength, xmm1, Value); \
+ TestMovssAddrXmmFloatLength(FloatLength, xmm2, Value); \
+ TestMovssAddrXmmFloatLength(FloatLength, xmm3, Value); \
+ TestMovssAddrXmmFloatLength(FloatLength, xmm4, Value); \
+ TestMovssAddrXmmFloatLength(FloatLength, xmm5, Value); \
+ TestMovssAddrXmmFloatLength(FloatLength, xmm6, Value); \
+ TestMovssAddrXmmFloatLength(FloatLength, xmm7, Value); \
+ TestMovssAddrXmmFloatLength(FloatLength, xmm8, Value); \
+ TestMovssAddrXmmFloatLength(FloatLength, xmm9, Value); \
+ TestMovssAddrXmmFloatLength(FloatLength, xmm10, Value); \
+ TestMovssAddrXmmFloatLength(FloatLength, xmm11, Value); \
+ TestMovssAddrXmmFloatLength(FloatLength, xmm12, Value); \
+ TestMovssAddrXmmFloatLength(FloatLength, xmm13, Value); \
+ TestMovssAddrXmmFloatLength(FloatLength, xmm14, Value); \
+ TestMovssAddrXmmFloatLength(FloatLength, xmm15, Value); \
+ } \
+ } while (0)
+
+ TestMovssAddrXmm(32);
+ TestMovssAddrXmm(64);
+
+#undef TestMovssAddrXmm
+#undef TestMovssAddrXmmType
+}
+
+TEST_F(AssemblerX8664Test, MovssXmmXmm) {
+#define TestMovssXmmXmmFloatLength(FloatLength, Src, Dst, Value) \
+ do { \
+ static_assert((FloatLength) == 32 || (FloatLength) == 64, \
+ "Invalid fp length #FloatLength"); \
+ using Type = std::conditional<FloatLength == 32, float, double>::type; \
+ \
+ static constexpr char TestString[] = \
+ "(" #FloatLength ", " #Src ", " #Dst ")"; \
+ static constexpr bool IsDouble = std::is_same<Type, double>::value; \
+ const uint32_t T0 = allocateQword(); \
+ const Type V0 = Value; \
+ const uint32_t T1 = allocateQword(); \
+ static_assert(std::numeric_limits<Type>::has_quiet_NaN, \
+ "f" #FloatLength " does not have quiet nan."); \
+ const Type V1 = std::numeric_limits<Type>::quiet_NaN(); \
+ \
+ __ movss(IceType_f##FloatLength, Encoded_Xmm_##Src(), dwordAddress(T0)); \
+ __ movss(IceType_f##FloatLength, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ __ movss(IceType_f##FloatLength, Encoded_Xmm_##Dst(), \
+ Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ if (IsDouble) { \
+ test.setQwordTo(T0, static_cast<double>(V0)); \
+ test.setQwordTo(T1, static_cast<double>(V1)); \
+ } else { \
+ test.setDwordTo(T0, static_cast<float>(V0)); \
+ test.setDwordTo(T1, static_cast<float>(V1)); \
+ } \
+ test.run(); \
+ ASSERT_DOUBLE_EQ(Value, test.Dst<Type>()) << TestString << " value is " \
+ << Value; \
+ reset(); \
+ } while (0)
+
+#define TestMovssXmmXmm(FloatLength) \
+ do { \
+ using Type = std::conditional<FloatLength == 32, float, double>::type; \
+ for (const Type Value : {0.0, -0.0, 1.0, -1.0, 3.14, 99999.9999}) { \
+ TestMovssXmmXmmFloatLength(FloatLength, xmm0, xmm1, Value); \
+ TestMovssXmmXmmFloatLength(FloatLength, xmm1, xmm2, Value); \
+ TestMovssXmmXmmFloatLength(FloatLength, xmm2, xmm3, Value); \
+ TestMovssXmmXmmFloatLength(FloatLength, xmm3, xmm4, Value); \
+ TestMovssXmmXmmFloatLength(FloatLength, xmm4, xmm5, Value); \
+ TestMovssXmmXmmFloatLength(FloatLength, xmm5, xmm6, Value); \
+ TestMovssXmmXmmFloatLength(FloatLength, xmm6, xmm7, Value); \
+ TestMovssXmmXmmFloatLength(FloatLength, xmm7, xmm8, Value); \
+ TestMovssXmmXmmFloatLength(FloatLength, xmm8, xmm9, Value); \
+ TestMovssXmmXmmFloatLength(FloatLength, xmm9, xmm10, Value); \
+ TestMovssXmmXmmFloatLength(FloatLength, xmm10, xmm11, Value); \
+ TestMovssXmmXmmFloatLength(FloatLength, xmm11, xmm12, Value); \
+ TestMovssXmmXmmFloatLength(FloatLength, xmm12, xmm13, Value); \
+ TestMovssXmmXmmFloatLength(FloatLength, xmm13, xmm14, Value); \
+ TestMovssXmmXmmFloatLength(FloatLength, xmm14, xmm15, Value); \
+ TestMovssXmmXmmFloatLength(FloatLength, xmm15, xmm0, Value); \
+ } \
+ } while (0)
+
+ TestMovssXmmXmm(32);
+ TestMovssXmmXmm(64);
+
+#undef TestMovssXmmXmm
+#undef TestMovssXmmXmmType
+}
+
+TEST_F(AssemblerX8664Test, MovdToXmm) {
+#define TestMovdXmmReg(Src, Dst, Value) \
+ do { \
+ assert(((Value)&0xFFFFFFFF) == (Value)); \
+ static constexpr char TestString[] = "(" #Src ", " #Dst ")"; \
+ const uint32_t T0 = allocateQword(); \
+ const uint64_t V0 = 0xFFFFFFFF00000000ull; \
+ \
+ __ mov(IceType_i32, Encoded_GPR_##Src(), Immediate(Value)); \
+ __ movss(IceType_f64, Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movd(Encoded_Xmm_##Dst(), Encoded_GPR_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ \
+ test.setQwordTo(T0, V0); \
+ test.run(); \
+ \
+ ASSERT_EQ(Value, test.Dst<uint64_t>()) << TestString << " value is " \
+ << Value; \
+ reset(); \
+ } while (0)
+
+#define TestMovdXmmAddr(Dst, Value) \
+ do { \
+ assert(((Value)&0xFFFFFFFF) == (Value)); \
+ static constexpr char TestString[] = "(" #Dst ", Addr)"; \
+ const uint32_t T0 = allocateQword(); \
+ const uint32_t V0 = Value; \
+ const uint32_t T1 = allocateQword(); \
+ const uint64_t V1 = 0xFFFFFFFF00000000ull; \
+ \
+ __ movss(IceType_f64, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ __ movd(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ \
+ AssembledTest test = assemble(); \
+ \
+ test.setDwordTo(T0, V0); \
+ test.setQwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(Value, test.Dst<uint64_t>()) << TestString << " value is " \
+ << Value; \
+ reset(); \
+ } while (0)
+
+#define TestMovd(Dst) \
+ do { \
+ for (uint32_t Value : {0u, 1u, 0x7FFFFFFFu, 0x80000000u, 0xFFFFFFFFu}) { \
+ TestMovdXmmReg(r1, Dst, Value); \
+ TestMovdXmmReg(r2, Dst, Value); \
+ TestMovdXmmReg(r3, Dst, Value); \
+ TestMovdXmmReg(r4, Dst, Value); \
+ TestMovdXmmReg(r5, Dst, Value); \
+ TestMovdXmmReg(r6, Dst, Value); \
+ TestMovdXmmReg(r7, Dst, Value); \
+ TestMovdXmmReg(r8, Dst, Value); \
+ TestMovdXmmReg(r10, Dst, Value); \
+ TestMovdXmmReg(r11, Dst, Value); \
+ TestMovdXmmReg(r12, Dst, Value); \
+ TestMovdXmmReg(r13, Dst, Value); \
+ TestMovdXmmReg(r14, Dst, Value); \
+ TestMovdXmmReg(r15, Dst, Value); \
+ TestMovdXmmAddr(Dst, Value); \
+ } \
+ } while (0)
+
+ TestMovd(xmm0);
+ TestMovd(xmm1);
+ TestMovd(xmm2);
+ TestMovd(xmm3);
+ TestMovd(xmm4);
+ TestMovd(xmm5);
+ TestMovd(xmm6);
+ TestMovd(xmm7);
+ TestMovd(xmm8);
+ TestMovd(xmm9);
+ TestMovd(xmm10);
+ TestMovd(xmm11);
+ TestMovd(xmm12);
+ TestMovd(xmm13);
+ TestMovd(xmm14);
+ TestMovd(xmm15);
+
+#undef TestMovdXmmAddr
+#undef TestMovdXmmReg
+#undef TestMovd
+}
+
+TEST_F(AssemblerX8664Test, MovdFromXmm) {
+#define TestMovdRegXmm(Src, Dst, Value) \
+ do { \
+ assert(((Value)&0xFFFFFFFF) == (Value)); \
+ static constexpr char TestString[] = "(" #Src ", " #Dst ")"; \
+ const uint32_t T0 = allocateDword(); \
+ const uint32_t V0 = Value; \
+ \
+ __ movss(IceType_f64, Encoded_Xmm_##Src(), dwordAddress(T0)); \
+ __ movd(Encoded_GPR_##Dst(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ \
+ test.setDwordTo(T0, V0); \
+ test.run(); \
+ \
+ ASSERT_EQ(Value, test.contentsOfDword(T0)) << TestString << " value is " \
+ << Value; \
+ reset(); \
+ } while (0)
+
+#define TestMovdAddrXmm(Src, Value) \
+ do { \
+ assert(((Value)&0xFFFFFFFF) == (Value)); \
+ static constexpr char TestString[] = "(" #Src ", Addr)"; \
+ const uint32_t T0 = allocateDword(); \
+ const uint32_t V0 = Value; \
+ const uint32_t T1 = allocateDword(); \
+ const uint32_t V1 = ~(Value); \
+ \
+ __ movss(IceType_f64, Encoded_Xmm_##Src(), dwordAddress(T0)); \
+ __ movd(dwordAddress(T1), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ \
+ test.setDwordTo(T0, V0); \
+ test.setDwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(Value, test.contentsOfDword(T1)) << TestString << " value is " \
+ << Value; \
+ reset(); \
+ } while (0)
+
+#define TestMovd(Src) \
+ do { \
+ for (uint32_t Value : {0u, 1u, 0x7FFFFFFFu, 0x80000000u, 0xFFFFFFFFu}) { \
+ TestMovdRegXmm(Src, r1, Value); \
+ TestMovdRegXmm(Src, r2, Value); \
+ TestMovdRegXmm(Src, r3, Value); \
+ TestMovdRegXmm(Src, r4, Value); \
+ TestMovdRegXmm(Src, r5, Value); \
+ TestMovdRegXmm(Src, r6, Value); \
+ TestMovdRegXmm(Src, r7, Value); \
+ TestMovdRegXmm(Src, r8, Value); \
+ TestMovdRegXmm(Src, r10, Value); \
+ TestMovdRegXmm(Src, r11, Value); \
+ TestMovdRegXmm(Src, r12, Value); \
+ TestMovdRegXmm(Src, r13, Value); \
+ TestMovdRegXmm(Src, r14, Value); \
+ TestMovdRegXmm(Src, r15, Value); \
+ TestMovdAddrXmm(Src, Value); \
+ } \
+ } while (0)
+
+ TestMovd(xmm0);
+ TestMovd(xmm1);
+ TestMovd(xmm2);
+ TestMovd(xmm3);
+ TestMovd(xmm4);
+ TestMovd(xmm5);
+ TestMovd(xmm6);
+ TestMovd(xmm7);
+ TestMovd(xmm8);
+ TestMovd(xmm9);
+ TestMovd(xmm10);
+ TestMovd(xmm11);
+ TestMovd(xmm12);
+ TestMovd(xmm13);
+ TestMovd(xmm14);
+ TestMovd(xmm15);
+
+#undef TestMovdAddrXmm
+#undef TestMovdRegXmm
+#undef TestMovd
+}
+
+TEST_F(AssemblerX8664Test, MovqXmmAddr) {
+#define TestMovd(Dst, Value) \
+ do { \
+ static constexpr char TestString[] = "(" #Dst ", Addr)"; \
+ const uint32_t T0 = allocateQword(); \
+ const uint64_t V0 = Value; \
+ const uint32_t T1 = allocateQword(); \
+ const uint64_t V1 = ~(Value); \
+ \
+ __ movss(IceType_f64, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ __ movq(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ \
+ AssembledTest test = assemble(); \
+ \
+ test.setQwordTo(T0, V0); \
+ test.setQwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(Value, test.Dst<uint64_t>()) << TestString << " value is " \
+ << Value; \
+ reset(); \
+ } while (0)
+
+ for (uint32_t Value : {0u, 1u, 0x7FFFFFFFu, 0x80000000u, 0xFFFFFFFFu}) {
+ TestMovd(xmm0, Value);
+ TestMovd(xmm1, Value);
+ TestMovd(xmm2, Value);
+ TestMovd(xmm3, Value);
+ TestMovd(xmm4, Value);
+ TestMovd(xmm5, Value);
+ TestMovd(xmm6, Value);
+ TestMovd(xmm7, Value);
+ TestMovd(xmm8, Value);
+ TestMovd(xmm9, Value);
+ TestMovd(xmm10, Value);
+ TestMovd(xmm11, Value);
+ TestMovd(xmm12, Value);
+ TestMovd(xmm13, Value);
+ TestMovd(xmm14, Value);
+ TestMovd(xmm15, Value);
+ }
+
+#undef TestMovd
+}
+
+TEST_F(AssemblerX8664Test, MovqAddrXmm) {
+#define TestMovd(Dst, Value) \
+ do { \
+ static constexpr char TestString[] = "(" #Dst ", Addr)"; \
+ const uint32_t T0 = allocateQword(); \
+ const uint64_t V0 = Value; \
+ const uint32_t T1 = allocateQword(); \
+ const uint64_t V1 = ~(Value); \
+ \
+ __ movq(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movq(dwordAddress(T1), Encoded_Xmm_##Dst()); \
+ \
+ AssembledTest test = assemble(); \
+ \
+ test.setQwordTo(T0, V0); \
+ test.setQwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(Value, test.Dst<uint64_t>()) << TestString << " value is " \
+ << Value; \
+ reset(); \
+ } while (0)
+
+ for (uint32_t Value : {0u, 1u, 0x7FFFFFFFu, 0x80000000u, 0xFFFFFFFFu}) {
+ TestMovd(xmm0, Value);
+ TestMovd(xmm1, Value);
+ TestMovd(xmm2, Value);
+ TestMovd(xmm3, Value);
+ TestMovd(xmm4, Value);
+ TestMovd(xmm5, Value);
+ TestMovd(xmm6, Value);
+ TestMovd(xmm7, Value);
+ TestMovd(xmm8, Value);
+ TestMovd(xmm9, Value);
+ TestMovd(xmm10, Value);
+ TestMovd(xmm11, Value);
+ TestMovd(xmm12, Value);
+ TestMovd(xmm13, Value);
+ TestMovd(xmm14, Value);
+ TestMovd(xmm15, Value);
+ }
+
+#undef TestMovd
+}
+
+TEST_F(AssemblerX8664Test, MovqXmmXmm) {
+#define TestMovd(Src, Dst, Value) \
+ do { \
+ static constexpr char TestString[] = "(" #Src ", " #Dst ")"; \
+ const uint32_t T0 = allocateQword(); \
+ const uint64_t V0 = Value; \
+ const uint32_t T1 = allocateQword(); \
+ const uint64_t V1 = ~(Value); \
+ \
+ __ movq(Encoded_Xmm_##Src(), dwordAddress(T0)); \
+ __ movq(Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ __ movq(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ \
+ test.setQwordTo(T0, V0); \
+ test.setQwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(Value, test.Dst<uint64_t>()) << TestString << " value is " \
+ << Value; \
+ reset(); \
+ } while (0)
+
+ for (uint32_t Value : {0u, 1u, 0x7FFFFFFFu, 0x80000000u, 0xFFFFFFFFu}) {
+ TestMovd(xmm0, xmm1, Value);
+ TestMovd(xmm1, xmm2, Value);
+ TestMovd(xmm2, xmm3, Value);
+ TestMovd(xmm3, xmm4, Value);
+ TestMovd(xmm4, xmm5, Value);
+ TestMovd(xmm5, xmm6, Value);
+ TestMovd(xmm6, xmm7, Value);
+ TestMovd(xmm7, xmm8, Value);
+ TestMovd(xmm8, xmm9, Value);
+ TestMovd(xmm9, xmm10, Value);
+ TestMovd(xmm10, xmm11, Value);
+ TestMovd(xmm11, xmm12, Value);
+ TestMovd(xmm12, xmm13, Value);
+ TestMovd(xmm13, xmm14, Value);
+ TestMovd(xmm14, xmm15, Value);
+ TestMovd(xmm15, xmm0, Value);
+ }
+
+#undef TestMovd
+}
+
+TEST_F(AssemblerX8664Test, ArithSS) {
+#define TestArithSSXmmXmm(FloatSize, Src, Value0, Dst, Value1, Inst, Op) \
+ do { \
+ static_assert(FloatSize == 32 || FloatSize == 64, \
+ "Invalid fp size " #FloatSize); \
+ static constexpr char TestString[] = \
+ "(" #FloatSize ", " #Src ", " #Value0 ", " #Dst ", " #Value1 \
+ ", " #Inst ", " #Op ")"; \
+ static constexpr bool IsDouble = FloatSize == 64; \
+ using Type = std::conditional<IsDouble, double, float>::type; \
+ const uint32_t T0 = allocateQword(); \
+ const Type V0 = Value0; \
+ const uint32_t T1 = allocateQword(); \
+ const Type V1 = Value1; \
+ \
+ __ movss(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movss(IceType_f##FloatSize, Encoded_Xmm_##Src(), dwordAddress(T1)); \
+ __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ if (IsDouble) { \
+ test.setQwordTo(T0, static_cast<double>(V0)); \
+ test.setQwordTo(T1, static_cast<double>(V1)); \
+ } else { \
+ test.setDwordTo(T0, static_cast<float>(V0)); \
+ test.setDwordTo(T1, static_cast<float>(V1)); \
+ } \
+ \
+ test.run(); \
+ \
+ ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestArithSSXmmAddr(FloatSize, Value0, Dst, Value1, Inst, Op) \
+ do { \
+ static_assert(FloatSize == 32 || FloatSize == 64, \
+ "Invalid fp size " #FloatSize); \
+ static constexpr char TestString[] = \
+ "(" #FloatSize ", Addr, " #Value0 ", " #Dst ", " #Value1 ", " #Inst \
+ ", " #Op ")"; \
+ static constexpr bool IsDouble = FloatSize == 64; \
+ using Type = std::conditional<IsDouble, double, float>::type; \
+ const uint32_t T0 = allocateQword(); \
+ const Type V0 = Value0; \
+ const uint32_t T1 = allocateQword(); \
+ const Type V1 = Value1; \
+ \
+ __ movss(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ \
+ AssembledTest test = assemble(); \
+ if (IsDouble) { \
+ test.setQwordTo(T0, static_cast<double>(V0)); \
+ test.setQwordTo(T1, static_cast<double>(V1)); \
+ } else { \
+ test.setDwordTo(T0, static_cast<float>(V0)); \
+ test.setDwordTo(T1, static_cast<float>(V1)); \
+ } \
+ \
+ test.run(); \
+ \
+ ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestArithSS(FloatSize, Src, Dst0, Dst1) \
+ do { \
+ TestArithSSXmmXmm(FloatSize, Src, 1.0, Dst0, 10.0, addss, +); \
+ TestArithSSXmmAddr(FloatSize, 2.0, Dst1, 20.0, addss, +); \
+ TestArithSSXmmXmm(FloatSize, Src, 3.0, Dst0, 30.0, subss, -); \
+ TestArithSSXmmAddr(FloatSize, 4.0, Dst1, 40.0, subss, -); \
+ TestArithSSXmmXmm(FloatSize, Src, 5.0, Dst0, 50.0, mulss, *); \
+ TestArithSSXmmAddr(FloatSize, 6.0, Dst1, 60.0, mulss, *); \
+ TestArithSSXmmXmm(FloatSize, Src, 7.0, Dst0, 70.0, divss, / ); \
+ TestArithSSXmmAddr(FloatSize, 8.0, Dst1, 80.0, divss, / ); \
+ } while (0)
+
+#define TestImpl(Src, Dst0, Dst1) \
+ do { \
+ TestArithSS(32, Src, Dst0, Dst1); \
+ TestArithSS(64, Src, Dst0, Dst1); \
+ } while (0)
+
+ TestImpl(xmm0, xmm1, xmm2);
+ TestImpl(xmm1, xmm2, xmm3);
+ TestImpl(xmm2, xmm3, xmm4);
+ TestImpl(xmm3, xmm4, xmm5);
+ TestImpl(xmm4, xmm5, xmm6);
+ TestImpl(xmm5, xmm6, xmm7);
+ TestImpl(xmm6, xmm7, xmm8);
+ TestImpl(xmm7, xmm8, xmm9);
+ TestImpl(xmm8, xmm9, xmm10);
+ TestImpl(xmm9, xmm10, xmm11);
+ TestImpl(xmm10, xmm11, xmm12);
+ TestImpl(xmm11, xmm12, xmm13);
+ TestImpl(xmm12, xmm13, xmm14);
+ TestImpl(xmm13, xmm14, xmm15);
+ TestImpl(xmm14, xmm15, xmm0);
+ TestImpl(xmm15, xmm0, xmm1);
+
+#undef TestImpl
+#undef TestArithSS
+#undef TestArithSSXmmAddr
+#undef TestArithSSXmmXmm
+}
+
+TEST_F(AssemblerX8664Test, MovupsXmmAddr) {
+#define TestMovups(Dst) \
+ do { \
+ static constexpr char TestString[] = "(" #Dst ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0(1.0f, -1.0, std::numeric_limits<float>::quiet_NaN(), \
+ std::numeric_limits<float>::infinity()); \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.run(); \
+ \
+ ASSERT_EQ(V0, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+ TestMovups(xmm0);
+ TestMovups(xmm1);
+ TestMovups(xmm2);
+ TestMovups(xmm3);
+ TestMovups(xmm4);
+ TestMovups(xmm5);
+ TestMovups(xmm6);
+ TestMovups(xmm7);
+ TestMovups(xmm8);
+ TestMovups(xmm9);
+ TestMovups(xmm10);
+ TestMovups(xmm11);
+ TestMovups(xmm12);
+ TestMovups(xmm13);
+ TestMovups(xmm14);
+ TestMovups(xmm15);
+
+#undef TestMovups
+}
+
+TEST_F(AssemblerX8664Test, MovupsAddrXmm) {
+#define TestMovups(Src) \
+ do { \
+ static constexpr char TestString[] = "(" #Src ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0(1.0f, -1.0, std::numeric_limits<float>::quiet_NaN(), \
+ std::numeric_limits<float>::infinity()); \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1(0.0, 0.0, 0.0, 0.0); \
+ \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \
+ __ movups(dwordAddress(T1), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(V0, test.contentsOfDqword(T1)) << TestString; \
+ reset(); \
+ } while (0)
+
+ TestMovups(xmm0);
+ TestMovups(xmm1);
+ TestMovups(xmm2);
+ TestMovups(xmm3);
+ TestMovups(xmm4);
+ TestMovups(xmm5);
+ TestMovups(xmm6);
+ TestMovups(xmm7);
+ TestMovups(xmm8);
+ TestMovups(xmm9);
+ TestMovups(xmm10);
+ TestMovups(xmm11);
+ TestMovups(xmm12);
+ TestMovups(xmm13);
+ TestMovups(xmm14);
+ TestMovups(xmm15);
+
+#undef TestMovups
+}
+
+TEST_F(AssemblerX8664Test, MovupsXmmXmm) {
+#define TestMovups(Dst, Src) \
+ do { \
+ static constexpr char TestString[] = "(" #Dst ", " #Src ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0(1.0f, -1.0, std::numeric_limits<float>::quiet_NaN(), \
+ std::numeric_limits<float>::infinity()); \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1(0.0, 0.0, 0.0, 0.0); \
+ \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ __ movups(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(V0, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+ TestMovups(xmm0, xmm1);
+ TestMovups(xmm1, xmm2);
+ TestMovups(xmm2, xmm3);
+ TestMovups(xmm3, xmm4);
+ TestMovups(xmm4, xmm5);
+ TestMovups(xmm5, xmm6);
+ TestMovups(xmm6, xmm7);
+ TestMovups(xmm7, xmm8);
+ TestMovups(xmm8, xmm9);
+ TestMovups(xmm9, xmm10);
+ TestMovups(xmm10, xmm11);
+ TestMovups(xmm11, xmm12);
+ TestMovups(xmm12, xmm13);
+ TestMovups(xmm13, xmm14);
+ TestMovups(xmm14, xmm15);
+ TestMovups(xmm15, xmm0);
+
+#undef TestMovups
+}
+
+TEST_F(AssemblerX8664Test, MovapsXmmXmm) {
+#define TestMovaps(Dst, Src) \
+ do { \
+ static constexpr char TestString[] = "(" #Dst ", " #Src ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0(1.0f, -1.0, std::numeric_limits<float>::quiet_NaN(), \
+ std::numeric_limits<float>::infinity()); \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1(0.0, 0.0, 0.0, 0.0); \
+ \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ __ movaps(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(V0, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+ TestMovaps(xmm0, xmm1);
+ TestMovaps(xmm1, xmm2);
+ TestMovaps(xmm2, xmm3);
+ TestMovaps(xmm3, xmm4);
+ TestMovaps(xmm4, xmm5);
+ TestMovaps(xmm5, xmm6);
+ TestMovaps(xmm6, xmm7);
+ TestMovaps(xmm7, xmm8);
+ TestMovaps(xmm8, xmm9);
+ TestMovaps(xmm9, xmm10);
+ TestMovaps(xmm10, xmm11);
+ TestMovaps(xmm11, xmm12);
+ TestMovaps(xmm12, xmm13);
+ TestMovaps(xmm13, xmm14);
+ TestMovaps(xmm14, xmm15);
+ TestMovaps(xmm15, xmm0);
+
+#undef TestMovaps
+}
+
+TEST_F(AssemblerX8664Test, PArith) {
+#define TestPArithXmmXmm(Dst, Value0, Src, Value1, Inst, Op, Type, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op \
+ ", " #Type ", " #Size ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value0; \
+ \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1 Value1; \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
+ __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>()) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestPArithXmmAddr(Dst, Value0, Value1, Inst, Op, Type, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op \
+ ", " #Type ", " #Size ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value0; \
+ \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1 Value1; \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>()) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestPArithXmmImm(Dst, Value0, Imm, Inst, Op, Type, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", " #Imm ", " #Inst ", " #Op ", " #Type \
+ ", " #Size ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value0; \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Immediate(Imm)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.run(); \
+ \
+ ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op Imm, test.Dst<Dqword>()) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestPAndnXmmXmm(Dst, Value0, Src, Value1, Type, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", pandn, " #Type \
+ ", " #Size ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value0; \
+ \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1 Value1; \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
+ __ pandn(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(~(packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>()) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestPAndnXmmAddr(Dst, Value0, Value1, Type, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", Addr, " #Value1 ", pandn, " #Type ", " #Size \
+ ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value0; \
+ \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1 Value1; \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ pandn(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ((~packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>()) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestPArithSize(Dst, Src, Size) \
+ do { \
+ static_assert(Size == 8 || Size == 16 || Size == 32, "Invalid size."); \
+ if (Size != 8) { \
+ TestPArithXmmXmm( \
+ Dst, \
+ (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
+ Src, (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size); \
+ TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull), \
+ uint64_t(0x8080404002020101ull)), \
+ (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size); \
+ TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull), \
+ uint64_t(0x8080404002020101ull)), \
+ 3u, psra, >>, int, Size); \
+ TestPArithXmmXmm( \
+ Dst, \
+ (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
+ Src, (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size); \
+ TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull), \
+ uint64_t(0x8080404002020101ull)), \
+ (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size); \
+ TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull), \
+ uint64_t(0x8080404002020101ull)), \
+ 3u, psrl, >>, uint, Size); \
+ TestPArithXmmXmm( \
+ Dst, \
+ (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
+ Src, (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size); \
+ TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull), \
+ uint64_t(0x8080404002020101ull)), \
+ (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size); \
+ TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull), \
+ uint64_t(0x8080404002020101ull)), \
+ 3u, psll, <<, uint, Size); \
+ \
+ TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \
+ uint64_t(0x8080404002020101ull)), \
+ Src, (uint64_t(0xFFFFFFFF00000000ull), \
+ uint64_t(0x0123456789ABCDEull)), \
+ pmull, *, int, Size); \
+ TestPArithXmmAddr( \
+ Dst, \
+ (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
+ (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
+ pmull, *, int, Size); \
+ if (Size != 16) { \
+ TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \
+ uint64_t(0x8080404002020101ull)), \
+ Src, (uint64_t(0xFFFFFFFF00000000ull), \
+ uint64_t(0x0123456789ABCDEull)), \
+ pmuludq, *, uint, Size); \
+ TestPArithXmmAddr( \
+ Dst, (uint64_t(0x8040201008040201ull), \
+ uint64_t(0x8080404002020101ull)), \
+ (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
+ pmuludq, *, uint, Size); \
+ } \
+ } \
+ TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \
+ uint64_t(0x8080404002020101ull)), \
+ Src, (uint64_t(0xFFFFFFFF00000000ull), \
+ uint64_t(0x0123456789ABCDEull)), \
+ padd, +, int, Size); \
+ TestPArithXmmAddr( \
+ Dst, \
+ (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
+ (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
+ padd, +, int, Size); \
+ TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \
+ uint64_t(0x8080404002020101ull)), \
+ Src, (uint64_t(0xFFFFFFFF00000000ull), \
+ uint64_t(0x0123456789ABCDEull)), \
+ psub, -, int, Size); \
+ TestPArithXmmAddr( \
+ Dst, \
+ (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
+ (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
+ psub, -, int, Size); \
+ TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \
+ uint64_t(0x8080404002020101ull)), \
+ Src, (uint64_t(0xFFFFFFFF00000000ull), \
+ uint64_t(0x0123456789ABCDEull)), \
+ pand, &, int, Size); \
+ TestPArithXmmAddr( \
+ Dst, \
+ (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
+ (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
+ pand, &, int, Size); \
+ \
+ TestPAndnXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \
+ uint64_t(0x8080404002020101ull)), \
+ Src, (uint64_t(0xFFFFFFFF00000000ull), \
+ uint64_t(0x0123456789ABCDEull)), \
+ int, Size); \
+ TestPAndnXmmAddr( \
+ Dst, \
+ (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
+ (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
+ int, Size); \
+ \
+ TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \
+ uint64_t(0x8080404002020101ull)), \
+ Src, (uint64_t(0xFFFFFFFF00000000ull), \
+ uint64_t(0x0123456789ABCDEull)), \
+ por, |, int, Size); \
+ TestPArithXmmAddr( \
+ Dst, \
+ (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
+ (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
+ por, |, int, Size); \
+ TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \
+ uint64_t(0x8080404002020101ull)), \
+ Src, (uint64_t(0xFFFFFFFF00000000ull), \
+ uint64_t(0x0123456789ABCDEull)), \
+ pxor, ^, int, Size); \
+ TestPArithXmmAddr( \
+ Dst, \
+ (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
+ (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
+ pxor, ^, int, Size); \
+ } while (0)
+
+#define TestPArith(Src, Dst) \
+ do { \
+ TestPArithSize(Src, Dst, 8); \
+ TestPArithSize(Src, Dst, 16); \
+ TestPArithSize(Src, Dst, 32); \
+ } while (0)
+
+ TestPArith(xmm0, xmm1);
+ TestPArith(xmm1, xmm2);
+ TestPArith(xmm2, xmm3);
+ TestPArith(xmm3, xmm4);
+ TestPArith(xmm4, xmm5);
+ TestPArith(xmm5, xmm6);
+ TestPArith(xmm6, xmm7);
+ TestPArith(xmm7, xmm8);
+ TestPArith(xmm8, xmm9);
+ TestPArith(xmm9, xmm10);
+ TestPArith(xmm10, xmm11);
+ TestPArith(xmm11, xmm12);
+ TestPArith(xmm12, xmm13);
+ TestPArith(xmm13, xmm14);
+ TestPArith(xmm14, xmm15);
+ TestPArith(xmm15, xmm0);
+
+#undef TestPArith
+#undef TestPArithSize
+#undef TestPAndnXmmAddr
+#undef TestPAndnXmmXmm
+#undef TestPArithXmmImm
+#undef TestPArithXmmAddr
+#undef TestPArithXmmXmm
+}
+
+TEST_F(AssemblerX8664Test, ArithPS) {
+#define TestArithPSXmmXmm(Dst, Value0, Src, Value1, Inst, Op, Type) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op \
+ ", " #Type ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value0; \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1 Value1; \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
+ __ Inst(IceType_f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \
+ \
+ reset(); \
+ } while (0)
+
+#define TestArithPSXmmXmmUntyped(Dst, Value0, Src, Value1, Inst, Op, Type) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op \
+ ", " #Type ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value0; \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1 Value1; \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
+ __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \
+ \
+ reset(); \
+ } while (0)
+
+#define TestArithPSXmmAddrUntyped(Dst, Value0, Value1, Inst, Op, Type) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op \
+ ", " #Type ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value0; \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1 Value1; \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ Inst(Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \
+ \
+ reset(); \
+ } while (0)
+
+#define TestMinMaxPS(Dst, Value0, Src, Value1, Inst, Type) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Type \
+ ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value0; \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1 Value1; \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
+ __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(packedAs<Type>(V0).Inst(V1), test.Dst<Dqword>()) << TestString; \
+ \
+ reset(); \
+ } while (0)
+
+#define TestArithPSXmmAddr(Dst, Value0, Value1, Inst, Op, Type) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op \
+ ", " #Type ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value0; \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1 Value1; \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ Inst(IceType_f32, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \
+ \
+ reset(); \
+ } while (0)
+
+#define TestArithPS(Dst, Src) \
+ do { \
+ TestArithPSXmmXmm(Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
+ (0.55, 0.43, 0.23, 1.21), addps, +, float); \
+ TestArithPSXmmAddr(Dst, (1.0, 100.0, -1000.0, 20.0), \
+ (0.55, 0.43, 0.23, 1.21), addps, +, float); \
+ TestArithPSXmmXmm(Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
+ (0.55, 0.43, 0.23, 1.21), subps, -, float); \
+ TestArithPSXmmAddr(Dst, (1.0, 100.0, -1000.0, 20.0), \
+ (0.55, 0.43, 0.23, 1.21), subps, -, float); \
+ TestArithPSXmmXmm(Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
+ (0.55, 0.43, 0.23, 1.21), mulps, *, float); \
+ TestArithPSXmmAddr(Dst, (1.0, 100.0, -1000.0, 20.0), \
+ (0.55, 0.43, 0.23, 1.21), mulps, *, float); \
+ TestArithPSXmmXmm(Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
+ (0.55, 0.43, 0.23, 1.21), divps, /, float); \
+ TestArithPSXmmAddr(Dst, (1.0, 100.0, -1000.0, 20.0), \
+ (0.55, 0.43, 0.23, 1.21), divps, /, float); \
+ TestArithPSXmmXmmUntyped(Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
+ (0.55, 0.43, 0.23, 1.21), andps, &, float); \
+ TestArithPSXmmAddrUntyped(Dst, (1.0, 100.0, -1000.0, 20.0), \
+ (0.55, 0.43, 0.23, 1.21), andps, &, float); \
+ TestArithPSXmmXmmUntyped(Dst, (1.0, -1000.0), Src, (0.55, 1.21), andpd, &, \
+ double); \
+ TestArithPSXmmAddrUntyped(Dst, (1.0, -1000.0), (0.55, 1.21), andpd, &, \
+ double); \
+ TestArithPSXmmXmmUntyped(Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
+ (0.55, 0.43, 0.23, 1.21), orps, |, float); \
+ TestArithPSXmmXmmUntyped(Dst, (1.0, -1000.0), Src, (0.55, 1.21), orpd, |, \
+ double); \
+ TestMinMaxPS(Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
+ (0.55, 0.43, 0.23, 1.21), minps, float); \
+ TestMinMaxPS(Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
+ (0.55, 0.43, 0.23, 1.21), maxps, float); \
+ TestMinMaxPS(Dst, (1.0, -1000.0), Src, (0.55, 1.21), minpd, double); \
+ TestMinMaxPS(Dst, (1.0, -1000.0), Src, (0.55, 1.21), maxpd, double); \
+ TestArithPSXmmXmmUntyped(Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
+ (0.55, 0.43, 0.23, 1.21), xorps, ^, float); \
+ TestArithPSXmmAddrUntyped(Dst, (1.0, 100.0, -1000.0, 20.0), \
+ (0.55, 0.43, 0.23, 1.21), xorps, ^, float); \
+ TestArithPSXmmXmmUntyped(Dst, (1.0, -1000.0), Src, (0.55, 1.21), xorpd, ^, \
+ double); \
+ TestArithPSXmmAddrUntyped(Dst, (1.0, -1000.0), (0.55, 1.21), xorpd, ^, \
+ double); \
+ } while (0)
+
+ TestArithPS(xmm0, xmm1);
+ TestArithPS(xmm1, xmm2);
+ TestArithPS(xmm2, xmm3);
+ TestArithPS(xmm3, xmm4);
+ TestArithPS(xmm4, xmm5);
+ TestArithPS(xmm5, xmm6);
+ TestArithPS(xmm6, xmm7);
+ TestArithPS(xmm7, xmm8);
+ TestArithPS(xmm8, xmm9);
+ TestArithPS(xmm9, xmm10);
+ TestArithPS(xmm10, xmm11);
+ TestArithPS(xmm11, xmm12);
+ TestArithPS(xmm12, xmm13);
+ TestArithPS(xmm13, xmm14);
+ TestArithPS(xmm14, xmm15);
+ TestArithPS(xmm15, xmm0);
+
+#undef TestArithPs
+#undef TestMinMaxPS
+#undef TestArithPSXmmXmmUntyped
+#undef TestArithPSXmmAddr
+#undef TestArithPSXmmXmm
+}
+
+TEST_F(AssemblerX8664Test, Blending) {
+ using f32 = float;
+ using i8 = uint8_t;
+
+#define TestBlendingXmmXmm(Dst, Value0, Src, Value1, M /*ask*/, Inst, Type) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #M ", " #Inst \
+ ", " #Type ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value0; \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1 Value1; \
+ const uint32_t Mask = allocateDqword(); \
+ const Dqword MaskValue M; \
+ \
+ __ movups(Encoded_Xmm_xmm0(), dwordAddress(Mask)); \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
+ __ Inst(IceType_##Type, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.setDqwordTo(Mask, MaskValue); \
+ test.run(); \
+ \
+ ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestBlendingXmmAddr(Dst, Value0, Value1, M /*ask*/, Inst, Type) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #M ", " #Inst ", " #Type \
+ ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value0; \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1 Value1; \
+ const uint32_t Mask = allocateDqword(); \
+ const Dqword MaskValue M; \
+ \
+ __ movups(Encoded_Xmm_xmm0(), dwordAddress(Mask)); \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ Inst(IceType_##Type, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.setDqwordTo(Mask, MaskValue); \
+ test.run(); \
+ \
+ ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestBlending(Src, Dst) \
+ do { \
+ TestBlendingXmmXmm( \
+ Dst, (1.0, 2.0, 1.0, 2.0), Src, (-1.0, -2.0, -1.0, -2.0), \
+ (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)), \
+ blendvps, f32); \
+ TestBlendingXmmAddr( \
+ Dst, (1.0, 2.0, 1.0, 2.0), (-1.0, -2.0, -1.0, -2.0), \
+ (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)), \
+ blendvps, f32); \
+ TestBlendingXmmXmm( \
+ Dst, \
+ (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)), \
+ Src, \
+ (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)), \
+ (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)), \
+ pblendvb, i8); \
+ TestBlendingXmmAddr( \
+ Dst, \
+ (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)), \
+ (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)), \
+ (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)), \
+ pblendvb, i8); \
+ } while (0)
+
+ /* xmm0 is taken. It is the implicit mask . */
+ TestBlending(xmm1, xmm2);
+ TestBlending(xmm2, xmm3);
+ TestBlending(xmm3, xmm4);
+ TestBlending(xmm4, xmm5);
+ TestBlending(xmm5, xmm6);
+ TestBlending(xmm6, xmm7);
+ TestBlending(xmm7, xmm8);
+ TestBlending(xmm8, xmm9);
+ TestBlending(xmm9, xmm10);
+ TestBlending(xmm10, xmm11);
+ TestBlending(xmm11, xmm12);
+ TestBlending(xmm12, xmm13);
+ TestBlending(xmm13, xmm14);
+ TestBlending(xmm14, xmm15);
+ TestBlending(xmm15, xmm1);
+
+#undef TestBlending
+#undef TestBlendingXmmAddr
+#undef TestBlendingXmmXmm
+}
+
+TEST_F(AssemblerX8664Test, Cmpps) {
+#define TestCmppsXmmXmm(Dst, Src, C, Op) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Src ", " #Dst ", " #C ", " #Op ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0(-1.0, 1.0, 3.14, 1024.5); \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1(-1.0, 1.0, 3.14, 1024.5); \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
+ __ cmpps(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), Cond::Cmpps_##C); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(packedAs<float>(V0) Op V1, test.Dst<Dqword>()) << TestString; \
+ ; \
+ reset(); \
+ } while (0)
+
+#define TestCmppsXmmAddr(Dst, C, Op) \
+ do { \
+ static constexpr char TestString[] = "(" #Dst ", Addr, " #C ", " #Op ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0(-1.0, 1.0, 3.14, 1024.5); \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1(-1.0, 1.0, 3.14, 1024.5); \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ cmpps(Encoded_Xmm_##Dst(), dwordAddress(T1), Cond::Cmpps_##C); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(packedAs<float>(V0) Op V1, test.Dst<Dqword>()) << TestString; \
+ ; \
+ reset(); \
+ } while (0)
+
+#define TestCmppsOrdUnordXmmXmm(Dst, Src, C) \
+ do { \
+ static constexpr char TestString[] = "(" #Src ", " #Dst ", " #C ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(), \
+ std::numeric_limits<float>::quiet_NaN()); \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0, \
+ std::numeric_limits<float>::quiet_NaN()); \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
+ __ cmpps(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), Cond::Cmpps_##C); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(packedAs<float>(V0).C(V1), test.Dst<Dqword>()) << TestString; \
+ ; \
+ reset(); \
+ } while (0)
+
+#define TestCmppsOrdUnordXmmAddr(Dst, C) \
+ do { \
+ static constexpr char TestString[] = "(" #Dst ", " #C ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(), \
+ std::numeric_limits<float>::quiet_NaN()); \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0, \
+ std::numeric_limits<float>::quiet_NaN()); \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ cmpps(Encoded_Xmm_##Dst(), dwordAddress(T1), Cond::Cmpps_##C); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(packedAs<float>(V0).C(V1), test.Dst<Dqword>()) << TestString; \
+ ; \
+ reset(); \
+ } while (0)
+
+#define TestCmpps(Dst, Src) \
+ do { \
+ TestCmppsXmmXmm(Dst, Src, eq, == ); \
+ TestCmppsXmmAddr(Dst, eq, == ); \
+ TestCmppsXmmXmm(Dst, Src, eq, == ); \
+ TestCmppsXmmAddr(Dst, eq, == ); \
+ TestCmppsXmmXmm(Dst, Src, eq, == ); \
+ TestCmppsXmmAddr(Dst, eq, == ); \
+ TestCmppsOrdUnordXmmXmm(Dst, Src, unord); \
+ TestCmppsOrdUnordXmmAddr(Dst, unord); \
+ TestCmppsXmmXmm(Dst, Src, eq, == ); \
+ TestCmppsXmmAddr(Dst, eq, == ); \
+ TestCmppsXmmXmm(Dst, Src, eq, == ); \
+ TestCmppsXmmAddr(Dst, eq, == ); \
+ TestCmppsXmmXmm(Dst, Src, eq, == ); \
+ TestCmppsXmmAddr(Dst, eq, == ); \
+ TestCmppsOrdUnordXmmXmm(Dst, Src, unord); \
+ TestCmppsOrdUnordXmmAddr(Dst, unord); \
+ } while (0)
+
+ TestCmpps(xmm0, xmm1);
+ TestCmpps(xmm1, xmm2);
+ TestCmpps(xmm2, xmm3);
+ TestCmpps(xmm3, xmm4);
+ TestCmpps(xmm4, xmm5);
+ TestCmpps(xmm5, xmm6);
+ TestCmpps(xmm6, xmm7);
+ TestCmpps(xmm7, xmm8);
+ TestCmpps(xmm8, xmm9);
+ TestCmpps(xmm9, xmm10);
+ TestCmpps(xmm10, xmm11);
+ TestCmpps(xmm11, xmm12);
+ TestCmpps(xmm12, xmm13);
+ TestCmpps(xmm13, xmm14);
+ TestCmpps(xmm14, xmm15);
+ TestCmpps(xmm15, xmm0);
+
+#undef TestCmpps
+#undef TestCmppsOrdUnordXmmAddr
+#undef TestCmppsOrdUnordXmmXmm
+#undef TestCmppsXmmAddr
+#undef TestCmppsXmmXmm
+}
+
+TEST_F(AssemblerX8664Test, Sqrtps_Rsqrtps_Reciprocalps_Sqrtpd) {
+#define TestImplSingle(Dst, Inst, Expect) \
+ do { \
+ static constexpr char TestString[] = "(" #Dst ", " #Inst ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0(1.0, 4.0, 20.0, 3.14); \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ Inst(Encoded_Xmm_##Dst()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.run(); \
+ ASSERT_EQ(Dqword Expect, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImpl(Dst) \
+ do { \
+ TestImplSingle(Dst, sqrtps, (uint64_t(0x400000003F800000ull), \
+ uint64_t(0x3FE2D10B408F1BBDull))); \
+ TestImplSingle(Dst, rsqrtps, (uint64_t(0x3EFFF0003F7FF000ull), \
+ uint64_t(0x3F1078003E64F000ull))); \
+ TestImplSingle(Dst, reciprocalps, (uint64_t(0x3E7FF0003F7FF000ull), \
+ uint64_t(0x3EA310003D4CC000ull))); \
+ \
+ TestImplSingle(Dst, sqrtpd, (uint64_t(0x4036A09E9365F5F3ull), \
+ uint64_t(0x401C42FAE40282A8ull))); \
+ } while (0)
+
+ TestImpl(xmm0);
+ TestImpl(xmm1);
+ TestImpl(xmm2);
+ TestImpl(xmm3);
+ TestImpl(xmm4);
+ TestImpl(xmm5);
+ TestImpl(xmm6);
+ TestImpl(xmm7);
+ TestImpl(xmm8);
+ TestImpl(xmm9);
+ TestImpl(xmm10);
+ TestImpl(xmm11);
+ TestImpl(xmm12);
+ TestImpl(xmm13);
+ TestImpl(xmm14);
+ TestImpl(xmm15);
+
+#undef TestImpl
+#undef TestImplSingle
+}
+
+TEST_F(AssemblerX8664Test, Movhlps_Movlhps) {
+#define TestImplSingle(Dst, Src, Inst, Expect) \
+ do { \
+ static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0(uint64_t(0xAAAAAAAABBBBBBBBull), \
+ uint64_t(0xCCCCCCCCDDDDDDDDull)); \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1(uint64_t(0xEEEEEEEEFFFFFFFFull), \
+ uint64_t(0x9999999988888888ull)); \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
+ __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(Dqword Expect, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImpl(Dst, Src) \
+ do { \
+ TestImplSingle(Dst, Src, movhlps, (uint64_t(0x9999999988888888ull), \
+ uint64_t(0xCCCCCCCCDDDDDDDDull))); \
+ TestImplSingle(Dst, Src, movlhps, (uint64_t(0xAAAAAAAABBBBBBBBull), \
+ uint64_t(0xEEEEEEEEFFFFFFFFull))); \
+ } while (0)
+
+ TestImpl(xmm0, xmm1);
+ TestImpl(xmm1, xmm2);
+ TestImpl(xmm2, xmm3);
+ TestImpl(xmm3, xmm4);
+ TestImpl(xmm4, xmm5);
+ TestImpl(xmm5, xmm6);
+ TestImpl(xmm6, xmm7);
+ TestImpl(xmm7, xmm8);
+ TestImpl(xmm8, xmm9);
+ TestImpl(xmm9, xmm10);
+ TestImpl(xmm10, xmm11);
+ TestImpl(xmm11, xmm12);
+ TestImpl(xmm12, xmm13);
+ TestImpl(xmm13, xmm14);
+ TestImpl(xmm14, xmm15);
+ TestImpl(xmm15, xmm0);
+
+#undef TestImpl
+#undef TestImplSingle
+}
+
+TEST_F(AssemblerX8664Test, Unpck) {
+ const Dqword V0(uint64_t(0xAAAAAAAABBBBBBBBull),
+ uint64_t(0xCCCCCCCCDDDDDDDDull));
+ const Dqword V1(uint64_t(0xEEEEEEEEFFFFFFFFull),
+ uint64_t(0x9999999988888888ull));
+
+ const Dqword unpcklpsExpected(uint64_t(0xFFFFFFFFBBBBBBBBull),
+ uint64_t(0xEEEEEEEEAAAAAAAAull));
+ const Dqword unpcklpdExpected(uint64_t(0xAAAAAAAABBBBBBBBull),
+ uint64_t(0xEEEEEEEEFFFFFFFFull));
+ const Dqword unpckhpsExpected(uint64_t(0x88888888DDDDDDDDull),
+ uint64_t(0x99999999CCCCCCCCull));
+ const Dqword unpckhpdExpected(uint64_t(0xCCCCCCCCDDDDDDDDull),
+ uint64_t(0x9999999988888888ull));
+
+#define TestImplSingle(Dst, Src, Inst) \
+ do { \
+ static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const uint32_t T1 = allocateDqword(); \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
+ __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImpl(Dst, Src) \
+ do { \
+ TestImplSingle(Dst, Src, unpcklps); \
+ TestImplSingle(Dst, Src, unpcklpd); \
+ TestImplSingle(Dst, Src, unpckhps); \
+ TestImplSingle(Dst, Src, unpckhpd); \
+ } while (0)
+
+ TestImpl(xmm0, xmm1);
+ TestImpl(xmm1, xmm2);
+ TestImpl(xmm2, xmm3);
+ TestImpl(xmm3, xmm4);
+ TestImpl(xmm4, xmm5);
+ TestImpl(xmm5, xmm6);
+ TestImpl(xmm6, xmm7);
+ TestImpl(xmm7, xmm8);
+ TestImpl(xmm8, xmm9);
+ TestImpl(xmm9, xmm10);
+ TestImpl(xmm10, xmm11);
+ TestImpl(xmm11, xmm12);
+ TestImpl(xmm12, xmm13);
+ TestImpl(xmm13, xmm14);
+ TestImpl(xmm14, xmm15);
+ TestImpl(xmm15, xmm0);
+
+#undef TestImpl
+#undef TestImplSingle
+}
+
+TEST_F(AssemblerX8664Test, Shufp) {
+ const Dqword V0(uint64_t(0x1111111122222222ull),
+ uint64_t(0x5555555577777777ull));
+ const Dqword V1(uint64_t(0xAAAAAAAABBBBBBBBull),
+ uint64_t(0xCCCCCCCCDDDDDDDDull));
+
+ const uint8_t pshufdImm = 0x63;
+ const Dqword pshufdExpected(uint64_t(0xBBBBBBBBCCCCCCCCull),
+ uint64_t(0xAAAAAAAADDDDDDDDull));
+
+ const uint8_t shufpsImm = 0xf9;
+ const Dqword shufpsExpected(uint64_t(0x7777777711111111ull),
+ uint64_t(0xCCCCCCCCCCCCCCCCull));
+
+#define TestImplSingleXmmXmm(Dst, Src, Inst) \
+ do { \
+ static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const uint32_t T1 = allocateDqword(); \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
+ __ Inst(IceType_f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \
+ Immediate(Inst##Imm)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplSingleXmmAddr(Dst, Inst) \
+ do { \
+ static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const uint32_t T1 = allocateDqword(); \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ Inst(IceType_f32, Encoded_Xmm_##Dst(), dwordAddress(T1), \
+ Immediate(Inst##Imm)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplSingleXmmXmmUntyped(Dst, Src, Inst) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Src ", " #Inst ", Untyped)"; \
+ const uint32_t T0 = allocateDqword(); \
+ const uint32_t T1 = allocateDqword(); \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
+ __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), Immediate(Inst##Imm)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(Inst##UntypedExpected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImpl(Dst, Src) \
+ do { \
+ TestImplSingleXmmXmm(Dst, Src, pshufd); \
+ TestImplSingleXmmAddr(Dst, pshufd); \
+ TestImplSingleXmmXmm(Dst, Src, shufps); \
+ TestImplSingleXmmAddr(Dst, shufps); \
+ } while (0)
+
+ TestImpl(xmm0, xmm1);
+ TestImpl(xmm1, xmm2);
+ TestImpl(xmm2, xmm3);
+ TestImpl(xmm3, xmm4);
+ TestImpl(xmm4, xmm5);
+ TestImpl(xmm5, xmm6);
+ TestImpl(xmm6, xmm7);
+ TestImpl(xmm7, xmm8);
+ TestImpl(xmm8, xmm9);
+ TestImpl(xmm9, xmm10);
+ TestImpl(xmm10, xmm11);
+ TestImpl(xmm11, xmm12);
+ TestImpl(xmm12, xmm13);
+ TestImpl(xmm13, xmm14);
+ TestImpl(xmm14, xmm15);
+ TestImpl(xmm15, xmm0);
+
+#undef TestImpl
+#undef TestImplSingleXmmXmmUntyped
+#undef TestImplSingleXmmAddr
+#undef TestImplSingleXmmXmm
+}
+
+TEST_F(AssemblerX8664Test, Cvt) {
+ const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
+ const Dqword dq2ps32SrcValue(-5, 3, 100, 200);
+ const Dqword dq2ps32Expected(-5.0f, 3.0f, 100.0, 200.0);
+
+ const Dqword dq2ps64DstValue(0.0f, 0.0f, -1.0f, -1.0f);
+ const Dqword dq2ps64SrcValue(-5, 3, 100, 200);
+ const Dqword dq2ps64Expected(-5.0f, 3.0f, 100.0, 200.0);
+
+ const Dqword tps2dq32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
+ const Dqword tps2dq32SrcValue(-5.0f, 3.0f, 100.0, 200.0);
+ const Dqword tps2dq32Expected(-5, 3, 100, 200);
+
+ const Dqword tps2dq64DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
+ const Dqword tps2dq64SrcValue(-5.0f, 3.0f, 100.0, 200.0);
+ const Dqword tps2dq64Expected(-5, 3, 100, 200);
+
+ const Dqword si2ss32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
+ const int32_t si2ss32SrcValue = 5;
+ const Dqword si2ss32Expected(5.0f, -1.0f, -1.0f, -1.0f);
+
+ const Dqword si2ss64DstValue(-1.0, -1.0);
+ const int32_t si2ss64SrcValue = 5;
+ const Dqword si2ss64Expected(5.0, -1.0);
+
+ const int32_t tss2si32DstValue = 0xF00F0FF0;
+ const Dqword tss2si32SrcValue(-5.0f, -1.0f, -1.0f, -1.0f);
+ const int32_t tss2si32Expected = -5;
+
+ const int32_t tss2si64DstValue = 0xF00F0FF0;
+ const Dqword tss2si64SrcValue(-5.0, -1.0);
+ const int32_t tss2si64Expected = -5;
+
+ const Dqword float2float32DstValue(-1.0, -1.0);
+ const Dqword float2float32SrcValue(-5.0, 3, 100, 200);
+ const Dqword float2float32Expected(-5.0, -1.0);
+
+ const Dqword float2float64DstValue(-1.0, -1.0, -1.0, -1.0);
+ const Dqword float2float64SrcValue(-5.0, 3.0);
+ const Dqword float2float64Expected(-5.0, -1.0, -1.0, -1.0);
+
+#define TestImplPXmmXmm(Dst, Src, Inst, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Src ", cvt" #Inst ", f" #Size ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const uint32_t T1 = allocateDqword(); \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
+ __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, Inst##Size##DstValue); \
+ test.setDqwordTo(T1, Inst##Size##SrcValue); \
+ test.run(); \
+ \
+ ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplSXmmReg(Dst, GPR, Inst, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #GPR ", cvt" #Inst ", f" #Size ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##SrcValue)); \
+ __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_GPR_##GPR()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, Inst##Size##DstValue); \
+ test.run(); \
+ \
+ ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplSRegXmm(GPR, Src, Inst, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #GPR ", " #Src ", cvt" #Inst ", f" #Size ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ \
+ __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \
+ __ cvt##Inst(IceType_f##Size, Encoded_GPR_##GPR(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, Inst##Size##SrcValue); \
+ test.run(); \
+ \
+ ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR()) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplPXmmAddr(Dst, Inst, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const uint32_t T1 = allocateDqword(); \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, Inst##Size##DstValue); \
+ test.setDqwordTo(T1, Inst##Size##SrcValue); \
+ test.run(); \
+ \
+ ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplSXmmAddr(Dst, Inst, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const uint32_t T1 = allocateDword(); \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, Inst##Size##DstValue); \
+ test.setDwordTo(T1, Inst##Size##SrcValue); \
+ test.run(); \
+ \
+ ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplSRegAddr(GPR, Inst, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #GPR ", Addr, cvt" #Inst ", f" #Size ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ \
+ __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \
+ __ cvt##Inst(IceType_f##Size, Encoded_GPR_##GPR(), dwordAddress(T0)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, Inst##Size##SrcValue); \
+ test.run(); \
+ \
+ ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR()) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplSize(Dst, Src, GPR, Size) \
+ do { \
+ TestImplPXmmXmm(Dst, Src, dq2ps, Size); \
+ TestImplPXmmAddr(Src, dq2ps, Size); \
+ TestImplPXmmXmm(Dst, Src, tps2dq, Size); \
+ TestImplPXmmAddr(Src, tps2dq, Size); \
+ TestImplSXmmReg(Dst, GPR, si2ss, Size); \
+ TestImplSXmmAddr(Dst, si2ss, Size); \
+ TestImplSRegXmm(GPR, Src, tss2si, Size); \
+ TestImplSRegAddr(GPR, tss2si, Size); \
+ TestImplPXmmXmm(Dst, Src, float2float, Size); \
+ TestImplPXmmAddr(Src, float2float, Size); \
+ } while (0)
+
+#define TestImpl(Dst, Src, GPR) \
+ do { \
+ TestImplSize(Dst, Src, GPR, 32); \
+ TestImplSize(Dst, Src, GPR, 64); \
+ } while (0)
+
+ TestImpl(xmm0, xmm1, r1);
+ TestImpl(xmm1, xmm2, r2);
+ TestImpl(xmm2, xmm3, r3);
+ TestImpl(xmm3, xmm4, r4);
+ TestImpl(xmm4, xmm5, r5);
+ TestImpl(xmm5, xmm6, r6);
+ TestImpl(xmm6, xmm7, r7);
+ TestImpl(xmm7, xmm8, r8);
+ TestImpl(xmm8, xmm9, r10);
+ TestImpl(xmm9, xmm10, r11);
+ TestImpl(xmm10, xmm11, r12);
+ TestImpl(xmm11, xmm12, r13);
+ TestImpl(xmm12, xmm13, r14);
+ TestImpl(xmm13, xmm14, r15);
+ TestImpl(xmm14, xmm15, r1);
+ TestImpl(xmm15, xmm0, r2);
+
+#undef TestImpl
+#undef TestImplSize
+#undef TestImplSRegAddr
+#undef TestImplSXmmAddr
+#undef TestImplPXmmAddr
+#undef TestImplSRegXmm
+#undef TestImplSXmmReg
+#undef TestImplPXmmXmm
+}
+
+TEST_F(AssemblerX8664Test, Ucomiss) {
+ static constexpr float qnan32 = std::numeric_limits<float>::quiet_NaN();
+ static constexpr double qnan64 = std::numeric_limits<float>::quiet_NaN();
+
+ Dqword test32DstValue(0.0, qnan32, qnan32, qnan32);
+ Dqword test32SrcValue(0.0, qnan32, qnan32, qnan32);
+
+ Dqword test64DstValue(0.0, qnan64);
+ Dqword test64SrcValue(0.0, qnan64);
+
+#define TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity, \
+ BOther) \
+ do { \
+ static constexpr char NearBranch = AssemblerX8664::kNearJump; \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #CompType \
+ ", " #BParity ", " #BOther ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ test##Size##DstValue.F##Size[0] = Value0; \
+ const uint32_t T1 = allocateDqword(); \
+ test##Size##SrcValue.F##Size[0] = Value1; \
+ const uint32_t ImmIfTrue = 0xBEEF; \
+ const uint32_t ImmIfFalse = 0xC0FFE; \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
+ __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse)); \
+ __ ucomiss(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
+ Label Done; \
+ __ j(Cond::Br_##BParity, &Done, NearBranch); \
+ __ j(Cond::Br_##BOther, &Done, NearBranch); \
+ __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue)); \
+ __ bind(&Done); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, test##Size##DstValue); \
+ test.setDqwordTo(T1, test##Size##SrcValue); \
+ test.run(); \
+ \
+ ASSERT_EQ(ImmIfTrue, test.eax()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther) \
+ do { \
+ static constexpr char NearBranch = AssemblerX8664::kNearJump; \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #CompType \
+ ", " #BParity ", " #BOther ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ test##Size##DstValue.F##Size[0] = Value0; \
+ const uint32_t T1 = allocateDqword(); \
+ test##Size##SrcValue.F##Size[0] = Value1; \
+ const uint32_t ImmIfTrue = 0xBEEF; \
+ const uint32_t ImmIfFalse = 0xC0FFE; \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse)); \
+ __ ucomiss(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ Label Done; \
+ __ j(Cond::Br_##BParity, &Done, NearBranch); \
+ __ j(Cond::Br_##BOther, &Done, NearBranch); \
+ __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue)); \
+ __ bind(&Done); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, test##Size##DstValue); \
+ test.setDqwordTo(T1, test##Size##SrcValue); \
+ test.run(); \
+ \
+ ASSERT_EQ(ImmIfTrue, test.eax()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplCond(Dst, Value0, Src, Value1, Size, CompType, BParity, \
+ BOther) \
+ do { \
+ TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity, BOther); \
+ TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther); \
+ } while (0)
+
+#define TestImplSize(Dst, Src, Size) \
+ do { \
+ TestImplCond(Dst, 1.0, Src, 1.0, Size, isEq, p, ne); \
+ TestImplCond(Dst, 1.0, Src, 2.0, Size, isNe, p, e); \
+ TestImplCond(Dst, 1.0, Src, 2.0, Size, isLe, p, a); \
+ TestImplCond(Dst, 1.0, Src, 1.0, Size, isLe, p, a); \
+ TestImplCond(Dst, 1.0, Src, 2.0, Size, isLt, p, ae); \
+ TestImplCond(Dst, 2.0, Src, 1.0, Size, isGe, p, b); \
+ TestImplCond(Dst, 1.0, Src, 1.0, Size, isGe, p, b); \
+ TestImplCond(Dst, 2.0, Src, 1.0, Size, isGt, p, be); \
+ TestImplCond(Dst, qnan##Size, Src, 1.0, Size, isUnord, np, o); \
+ TestImplCond(Dst, 1.0, Src, qnan##Size, Size, isUnord, np, s); \
+ TestImplCond(Dst, qnan##Size, Src, qnan##Size, Size, isUnord, np, s); \
+ } while (0)
+
+#define TestImpl(Dst, Src) \
+ do { \
+ TestImplSize(Dst, Src, 32); \
+ TestImplSize(Dst, Src, 64); \
+ } while (0)
+
+ TestImpl(xmm0, xmm1);
+ TestImpl(xmm1, xmm2);
+ TestImpl(xmm2, xmm3);
+ TestImpl(xmm3, xmm4);
+ TestImpl(xmm4, xmm5);
+ TestImpl(xmm5, xmm6);
+ TestImpl(xmm6, xmm7);
+ TestImpl(xmm7, xmm8);
+ TestImpl(xmm8, xmm9);
+ TestImpl(xmm9, xmm10);
+ TestImpl(xmm10, xmm11);
+ TestImpl(xmm11, xmm12);
+ TestImpl(xmm12, xmm13);
+ TestImpl(xmm13, xmm14);
+ TestImpl(xmm14, xmm15);
+ TestImpl(xmm15, xmm0);
+
+#undef TestImpl
+#undef TestImplSize
+#undef TestImplCond
+#undef TestImplXmmAddr
+#undef TestImplXmmXmm
+}
+
+TEST_F(AssemblerX8664Test, Movmsk) {
+#define TestMovmskGPRXmm(GPR, Src, Value1, Expected, Inst) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #GPR ", " #Src ", " #Value1 ", " #Expected ", " #Inst ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value1; \
+ \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \
+ __ Inst(Encoded_GPR_##GPR(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.run(); \
+ \
+ ASSERT_EQ(Expected, test.GPR()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestMovmsk(GPR, Src) \
+ do { \
+ TestMovmskGPRXmm(GPR, Src, (-1.0, 1.0, -1.0, 1.0), 0x05ul, movmskps); \
+ TestMovmskGPRXmm(GPR, Src, (1.0, -1.0), 0x02ul, movmskpd); \
+ } while (0)
+
+ TestMovmsk(r1, xmm0);
+ TestMovmsk(r2, xmm1);
+ TestMovmsk(r3, xmm2);
+ TestMovmsk(r4, xmm3);
+ TestMovmsk(r5, xmm4);
+ TestMovmsk(r6, xmm5);
+ TestMovmsk(r7, xmm6);
+ TestMovmsk(r8, xmm7);
+ TestMovmsk(r10, xmm8);
+ TestMovmsk(r11, xmm9);
+ TestMovmsk(r12, xmm10);
+ TestMovmsk(r13, xmm11);
+ TestMovmsk(r14, xmm12);
+ TestMovmsk(r15, xmm13);
+ TestMovmsk(r1, xmm14);
+ TestMovmsk(r2, xmm15);
+
+#undef TestMovmskGPRXmm
+#undef TestMovmsk
+}
+
+TEST_F(AssemblerX8664Test, Sqrtss) {
+ Dqword test32SrcValue(-100.0, -100.0, -100.0, -100.0);
+ Dqword test32DstValue(-1.0, -1.0, -1.0, -1.0);
+
+ Dqword test64SrcValue(-100.0, -100.0);
+ Dqword test64DstValue(-1.0, -1.0);
+
+#define TestSqrtssXmmXmm(Dst, Src, Value1, Result, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Src ", " #Value1 ", " #Result ", " #Size ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ test##Size##SrcValue.F##Size[0] = Value1; \
+ const uint32_t T1 = allocateDqword(); \
+ \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ __ sqrtss(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, test##Size##SrcValue); \
+ test.setDqwordTo(T1, test##Size##DstValue); \
+ test.run(); \
+ \
+ Dqword Expected = test##Size##DstValue; \
+ Expected.F##Size[0] = Result; \
+ ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestSqrtssXmmAddr(Dst, Value1, Result, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", Addr, " #Value1 ", " #Result ", " #Size ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ test##Size##SrcValue.F##Size[0] = Value1; \
+ const uint32_t T1 = allocateDqword(); \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ __ sqrtss(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, test##Size##SrcValue); \
+ test.setDqwordTo(T1, test##Size##DstValue); \
+ test.run(); \
+ \
+ Dqword Expected = test##Size##DstValue; \
+ Expected.F##Size[0] = Result; \
+ ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestSqrtssSize(Dst, Src, Size) \
+ do { \
+ TestSqrtssXmmXmm(Dst, Src, 4.0, 2.0, Size); \
+ TestSqrtssXmmAddr(Dst, 4.0, 2.0, Size); \
+ TestSqrtssXmmXmm(Dst, Src, 9.0, 3.0, Size); \
+ TestSqrtssXmmAddr(Dst, 9.0, 3.0, Size); \
+ TestSqrtssXmmXmm(Dst, Src, 100.0, 10.0, Size); \
+ TestSqrtssXmmAddr(Dst, 100.0, 10.0, Size); \
+ } while (0)
+
+#define TestSqrtss(Dst, Src) \
+ do { \
+ TestSqrtssSize(Dst, Src, 32); \
+ TestSqrtssSize(Dst, Src, 64); \
+ } while (0)
+
+ TestSqrtss(xmm0, xmm1);
+ TestSqrtss(xmm1, xmm2);
+ TestSqrtss(xmm2, xmm3);
+ TestSqrtss(xmm3, xmm4);
+ TestSqrtss(xmm4, xmm5);
+ TestSqrtss(xmm5, xmm6);
+ TestSqrtss(xmm6, xmm7);
+ TestSqrtss(xmm7, xmm8);
+ TestSqrtss(xmm8, xmm9);
+ TestSqrtss(xmm9, xmm10);
+ TestSqrtss(xmm10, xmm11);
+ TestSqrtss(xmm11, xmm12);
+ TestSqrtss(xmm12, xmm13);
+ TestSqrtss(xmm13, xmm14);
+ TestSqrtss(xmm14, xmm15);
+ TestSqrtss(xmm15, xmm0);
+
+#undef TestSqrtss
+#undef TestSqrtssSize
+#undef TestSqrtssXmmAddr
+#undef TestSqrtssXmmXmm
+}
+
+TEST_F(AssemblerX8664Test, Insertps) {
+#define TestInsertpsXmmXmmImm(Dst, Value0, Src, Value1, Imm, Expected) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Imm ", " #Expected \
+ ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value0; \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1 Value1; \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
+ __ insertps(IceType_v4f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \
+ Immediate(Imm)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestInsertpsXmmAddrImm(Dst, Value0, Value1, Imm, Expected) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Expected ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value0; \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1 Value1; \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ insertps(IceType_v4f32, Encoded_Xmm_##Dst(), dwordAddress(T1), \
+ Immediate(Imm)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestInsertps(Dst, Src) \
+ do { \
+ TestInsertpsXmmXmmImm( \
+ Dst, (uint64_t(-1), uint64_t(-1)), Src, \
+ (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \
+ 0x99, \
+ (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x00000000FFFFFFFFull))); \
+ TestInsertpsXmmAddrImm( \
+ Dst, (uint64_t(-1), uint64_t(-1)), \
+ (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \
+ 0x99, \
+ (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x00000000FFFFFFFFull))); \
+ TestInsertpsXmmXmmImm( \
+ Dst, (uint64_t(-1), uint64_t(-1)), Src, \
+ (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \
+ 0x9D, \
+ (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x0000000000000000ull))); \
+ TestInsertpsXmmAddrImm( \
+ Dst, (uint64_t(-1), uint64_t(-1)), \
+ (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \
+ 0x9D, \
+ (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x0000000000000000ull))); \
+ } while (0)
+
+ TestInsertps(xmm0, xmm1);
+ TestInsertps(xmm1, xmm2);
+ TestInsertps(xmm2, xmm3);
+ TestInsertps(xmm3, xmm4);
+ TestInsertps(xmm4, xmm5);
+ TestInsertps(xmm5, xmm6);
+ TestInsertps(xmm6, xmm7);
+ TestInsertps(xmm7, xmm8);
+ TestInsertps(xmm8, xmm9);
+ TestInsertps(xmm9, xmm10);
+ TestInsertps(xmm10, xmm11);
+ TestInsertps(xmm11, xmm12);
+ TestInsertps(xmm12, xmm13);
+ TestInsertps(xmm13, xmm14);
+ TestInsertps(xmm14, xmm15);
+ TestInsertps(xmm15, xmm0);
+
+#undef TestInsertps
+#undef TestInsertpsXmmXmmAddr
+#undef TestInsertpsXmmXmmImm
+}
+
+TEST_F(AssemblerX8664Test, Pinsr) {
+ static constexpr uint8_t Mask32 = 0x03;
+ static constexpr uint8_t Mask16 = 0x07;
+ static constexpr uint8_t Mask8 = 0x0F;
+
+#define TestPinsrXmmGPRImm(Dst, Value0, GPR, Value1, Imm, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", " #GPR ", " #Value1 ", " #Imm ", " #Size ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value0; \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Value1)); \
+ __ pinsr(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_GPR_##GPR(), \
+ Immediate(Imm)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.run(); \
+ \
+ constexpr uint8_t sel = (Imm)&Mask##Size; \
+ Dqword Expected = V0; \
+ Expected.U##Size[sel] = Value1; \
+ ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestPinsrXmmAddrImm(Dst, Value0, Value1, Imm, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Size ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value0; \
+ const uint32_t T1 = allocateDword(); \
+ const uint32_t V1 = Value1; \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ pinsr(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1), \
+ Immediate(Imm)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDwordTo(T1, V1); \
+ test.run(); \
+ \
+ constexpr uint8_t sel = (Imm)&Mask##Size; \
+ Dqword Expected = V0; \
+ Expected.U##Size[sel] = Value1; \
+ ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestPinsrSize(Dst, GPR, Value1, Imm, Size) \
+ do { \
+ TestPinsrXmmGPRImm(Dst, (uint64_t(0xAAAAAAAABBBBBBBBull), \
+ uint64_t(0xFFFFFFFFDDDDDDDDull)), \
+ GPR, Value1, Imm, Size); \
+ TestPinsrXmmAddrImm(Dst, (uint64_t(0xAAAAAAAABBBBBBBBull), \
+ uint64_t(0xFFFFFFFFDDDDDDDDull)), \
+ Value1, Imm, Size); \
+ } while (0)
+
+#define TestPinsr(Src, Dst) \
+ do { \
+ TestPinsrSize(Src, Dst, 0xEE, 0x03, 8); \
+ TestPinsrSize(Src, Dst, 0xFFEE, 0x03, 16); \
+ TestPinsrSize(Src, Dst, 0xC0FFEE, 0x03, 32); \
+ } while (0)
+
+ TestPinsr(xmm0, r1);
+ TestPinsr(xmm1, r2);
+ TestPinsr(xmm2, r3);
+ TestPinsr(xmm3, r4);
+ TestPinsr(xmm4, r5);
+ TestPinsr(xmm5, r6);
+ TestPinsr(xmm6, r7);
+ TestPinsr(xmm7, r8);
+ TestPinsr(xmm8, r10);
+ TestPinsr(xmm9, r11);
+ TestPinsr(xmm10, r12);
+ TestPinsr(xmm11, r13);
+ TestPinsr(xmm12, r14);
+ TestPinsr(xmm13, r15);
+ TestPinsr(xmm14, r1);
+ TestPinsr(xmm15, r2);
+
+#undef TestPinsr
+#undef TestPinsrSize
+#undef TestPinsrXmmAddrImm
+#undef TestPinsrXmmGPRImm
+}
+
+TEST_F(AssemblerX8664Test, Pextr) {
+ static constexpr uint8_t Mask32 = 0x03;
+ static constexpr uint8_t Mask16 = 0x07;
+ static constexpr uint8_t Mask8 = 0x0F;
+
+#define TestPextrGPRXmmImm(GPR, Src, Value1, Imm, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #GPR ", " #Src ", " #Value1 ", " #Imm ", " #Size ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value1; \
+ \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \
+ __ pextr(IceType_i##Size, Encoded_GPR_##GPR(), Encoded_Xmm_##Src(), \
+ Immediate(Imm)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.run(); \
+ \
+ constexpr uint8_t sel = (Imm)&Mask##Size; \
+ ASSERT_EQ(V0.U##Size[sel], test.GPR()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestPextrSize(GPR, Src, Value1, Imm, Size) \
+ do { \
+ TestPextrGPRXmmImm(GPR, Src, (uint64_t(0xAAAAAAAABBBBBBBBull), \
+ uint64_t(0xFFFFFFFFDDDDDDDDull)), \
+ Imm, Size); \
+ } while (0)
+
+#define TestPextr(Src, Dst) \
+ do { \
+ TestPextrSize(Src, Dst, 0xEE, 0x03, 8); \
+ TestPextrSize(Src, Dst, 0xFFEE, 0x03, 16); \
+ TestPextrSize(Src, Dst, 0xC0FFEE, 0x03, 32); \
+ } while (0)
+
+ TestPextr(r1, xmm0);
+ TestPextr(r2, xmm1);
+ TestPextr(r3, xmm2);
+ TestPextr(r4, xmm3);
+ TestPextr(r5, xmm4);
+ TestPextr(r6, xmm5);
+ TestPextr(r7, xmm6);
+ TestPextr(r8, xmm7);
+ TestPextr(r10, xmm8);
+ TestPextr(r11, xmm9);
+ TestPextr(r12, xmm10);
+ TestPextr(r13, xmm11);
+ TestPextr(r14, xmm12);
+ TestPextr(r15, xmm13);
+ TestPextr(r1, xmm14);
+ TestPextr(r2, xmm15);
+
+#undef TestPextr
+#undef TestPextrSize
+#undef TestPextrXmmGPRImm
+}
+
+TEST_F(AssemblerX8664Test, Pmovsxdq) {
+#define TestPmovsxdqXmmXmm(Dst, Src, Value1) \
+ do { \
+ static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Value1 ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value1; \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1(uint64_t(0), uint64_t(0)); \
+ \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ __ pmovsxdq(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ const Dqword Expected(uint64_t(V0.I32[0]), uint64_t(V0.I32[1])); \
+ ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestPmovsxdq(Dst, Src) \
+ do { \
+ TestPmovsxdqXmmXmm(Dst, Src, (uint64_t(0x700000007FFFFFFFull), \
+ uint64_t(0xAAAAAAAAEEEEEEEEull))); \
+ TestPmovsxdqXmmXmm(Dst, Src, (uint64_t(0x800000007FFFFFFFull), \
+ uint64_t(0xAAAAAAAAEEEEEEEEull))); \
+ TestPmovsxdqXmmXmm(Dst, Src, (uint64_t(0x70000000FFFFFFFFull), \
+ uint64_t(0xAAAAAAAAEEEEEEEEull))); \
+ TestPmovsxdqXmmXmm(Dst, Src, (uint64_t(0x80000000FFFFFFFFull), \
+ uint64_t(0xAAAAAAAAEEEEEEEEull))); \
+ } while (0)
+
+ TestPmovsxdq(xmm0, xmm1);
+ TestPmovsxdq(xmm1, xmm2);
+ TestPmovsxdq(xmm2, xmm3);
+ TestPmovsxdq(xmm3, xmm4);
+ TestPmovsxdq(xmm4, xmm5);
+ TestPmovsxdq(xmm5, xmm6);
+ TestPmovsxdq(xmm6, xmm7);
+ TestPmovsxdq(xmm7, xmm8);
+ TestPmovsxdq(xmm8, xmm9);
+ TestPmovsxdq(xmm9, xmm10);
+ TestPmovsxdq(xmm10, xmm11);
+ TestPmovsxdq(xmm11, xmm12);
+ TestPmovsxdq(xmm12, xmm13);
+ TestPmovsxdq(xmm13, xmm14);
+ TestPmovsxdq(xmm14, xmm15);
+ TestPmovsxdq(xmm15, xmm0);
+
+#undef TestPmovsxdq
+#undef TestPmovsxdqXmmXmm
+}
+
+TEST_F(AssemblerX8664Test, Pcmpeq_Pcmpgt) {
+#define TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, Inst, Op) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #Op ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value0; \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1 Value1; \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
+ __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ Dqword Expected(uint64_t(0), uint64_t(0)); \
+ static constexpr uint8_t ArraySize = \
+ sizeof(Dqword) / sizeof(uint##Size##_t); \
+ for (uint8_t i = 0; i < ArraySize; ++i) { \
+ Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0; \
+ } \
+ ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestPcmpXmmAddr(Dst, Value0, Value1, Size, Inst, Op) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #Op ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0 Value0; \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1 Value1; \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ Dqword Expected(uint64_t(0), uint64_t(0)); \
+ static constexpr uint8_t ArraySize = \
+ sizeof(Dqword) / sizeof(uint##Size##_t); \
+ for (uint8_t i = 0; i < ArraySize; ++i) { \
+ Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0; \
+ } \
+ ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestPcmpValues(Dst, Value0, Src, Value1, Size) \
+ do { \
+ TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpeq, == ); \
+ TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpeq, == ); \
+ TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpgt, < ); \
+ TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpgt, < ); \
+ } while (0)
+
+#define TestPcmpSize(Dst, Src, Size) \
+ do { \
+ TestPcmpValues(Dst, (uint64_t(0x8888888888888888ull), \
+ uint64_t(0x0000000000000000ull)), \
+ Src, (uint64_t(0x0000008800008800ull), \
+ uint64_t(0xFFFFFFFFFFFFFFFFull)), \
+ Size); \
+ TestPcmpValues(Dst, (uint64_t(0x123567ABAB55DE01ull), \
+ uint64_t(0x12345abcde12345Aull)), \
+ Src, (uint64_t(0x0000008800008800ull), \
+ uint64_t(0xAABBCCDD1234321Aull)), \
+ Size); \
+ } while (0)
+
+#define TestPcmp(Dst, Src) \
+ do { \
+ TestPcmpSize(xmm0, xmm1, 8); \
+ TestPcmpSize(xmm0, xmm1, 16); \
+ TestPcmpSize(xmm0, xmm1, 32); \
+ } while (0)
+
+ TestPcmp(xmm0, xmm1);
+ TestPcmp(xmm1, xmm2);
+ TestPcmp(xmm2, xmm3);
+ TestPcmp(xmm3, xmm4);
+ TestPcmp(xmm4, xmm5);
+ TestPcmp(xmm5, xmm6);
+ TestPcmp(xmm6, xmm7);
+ TestPcmp(xmm7, xmm8);
+ TestPcmp(xmm8, xmm9);
+ TestPcmp(xmm9, xmm10);
+ TestPcmp(xmm10, xmm11);
+ TestPcmp(xmm11, xmm12);
+ TestPcmp(xmm12, xmm13);
+ TestPcmp(xmm13, xmm14);
+ TestPcmp(xmm14, xmm15);
+ TestPcmp(xmm15, xmm0);
+
+#undef TestPcmp
+#undef TestPcmpSize
+#undef TestPcmpValues
+#undef TestPcmpXmmAddr
+#undef TestPcmpXmmXmm
+}
+
+TEST_F(AssemblerX8664Test, Roundsd) {
+#define TestRoundsdXmmXmm(Dst, Src, Mode, Input, RN) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Src ", " #Mode ", " #Input ", " #RN ")"; \
+ const uint32_t T0 = allocateDqword(); \
+ const Dqword V0(-3.0, -3.0); \
+ const uint32_t T1 = allocateDqword(); \
+ const Dqword V1(double(Input), -123.4); \
+ \
+ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
+ __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
+ __ roundsd(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \
+ AssemblerX8664::k##Mode); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDqwordTo(T0, V0); \
+ test.setDqwordTo(T1, V1); \
+ test.run(); \
+ \
+ const Dqword Expected(double(RN), -3.0); \
+ EXPECT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestRoundsd(Dst, Src) \
+ do { \
+ TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.51, 6); \
+ TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.49, 5); \
+ TestRoundsdXmmXmm(Dst, Src, RoundDown, 5.51, 5); \
+ TestRoundsdXmmXmm(Dst, Src, RoundUp, 5.49, 6); \
+ TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.49, 5); \
+ TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.51, 5); \
+ } while (0)
+
+ TestRoundsd(xmm0, xmm1);
+ TestRoundsd(xmm1, xmm2);
+ TestRoundsd(xmm2, xmm3);
+ TestRoundsd(xmm3, xmm4);
+ TestRoundsd(xmm4, xmm5);
+ TestRoundsd(xmm5, xmm6);
+ TestRoundsd(xmm6, xmm7);
+ TestRoundsd(xmm7, xmm8);
+ TestRoundsd(xmm8, xmm9);
+ TestRoundsd(xmm9, xmm10);
+ TestRoundsd(xmm10, xmm11);
+ TestRoundsd(xmm11, xmm12);
+ TestRoundsd(xmm12, xmm13);
+ TestRoundsd(xmm13, xmm14);
+ TestRoundsd(xmm14, xmm15);
+ TestRoundsd(xmm15, xmm0);
+
+#undef TestRoundsd
+#undef TestRoundsdXmmXmm
+}
+
+TEST_F(AssemblerX8664Test, Test) {
+ static constexpr uint32_t Mask8 = 0xFF;
+ static constexpr uint32_t Mask16 = 0xFFFF;
+ static constexpr uint32_t Mask32 = 0xFFFFFFFF;
+
+#define TestImplRegReg(Dst, Value0, Src, Value1, Size) \
+ do { \
+ static constexpr bool NearJump = true; \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ")"; \
+ static constexpr uint32_t ValueIfTrue = 0xBEEFFEEB; \
+ static constexpr uint32_t ValueIfFalse = 0x11111111; \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst(), Immediate(Value0)); \
+ __ mov(IceType_i##Size, Encoded_GPR_##Src(), Immediate(Value1)); \
+ __ test(IceType_i##Size, Encoded_GPR_##Dst(), Encoded_GPR_##Src()); \
+ __ mov(IceType_i32, Encoded_GPR_##Dst(), Immediate(ValueIfFalse)); \
+ Label Done; \
+ __ j(Cond::Br_e, &Done, NearJump); \
+ __ mov(IceType_i32, Encoded_GPR_##Dst(), Immediate(ValueIfTrue)); \
+ __ bind(&Done); \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ ASSERT_EQ(((Value0)&Mask##Size) & ((Value1)&Mask##Size) ? ValueIfTrue \
+ : ValueIfFalse, \
+ test.Dst()) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplRegImm(Dst, Value0, Imm, Size) \
+ do { \
+ static constexpr bool NearJump = true; \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", " #Imm ", " #Size ")"; \
+ static constexpr uint32_t ValueIfTrue = 0xBEEFFEEB; \
+ static constexpr uint32_t ValueIfFalse = 0x11111111; \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst(), Immediate(Value0)); \
+ __ test(IceType_i##Size, Encoded_GPR_##Dst(), \
+ Immediate((Imm)&Mask##Size)); \
+ __ mov(IceType_i32, Encoded_GPR_##Dst(), Immediate(ValueIfFalse)); \
+ Label Done; \
+ __ j(Cond::Br_e, &Done, NearJump); \
+ __ mov(IceType_i32, Encoded_GPR_##Dst(), Immediate(ValueIfTrue)); \
+ __ bind(&Done); \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ ASSERT_EQ(((Value0)&Mask##Size) & ((Imm)&Mask##Size) ? ValueIfTrue \
+ : ValueIfFalse, \
+ test.Dst()) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplAddrReg(Value0, Src, Value1, Size) \
+ do { \
+ static constexpr bool NearJump = true; \
+ static constexpr char TestString[] = \
+ "(Addr, " #Value0 ", " #Src ", " #Value1 ", " #Size ")"; \
+ static constexpr uint32_t ValueIfTrue = 0xBEEFFEEB; \
+ static constexpr uint32_t ValueIfFalse = 0x11111111; \
+ const uint32_t T0 = allocateDword(); \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_##Src(), Immediate(Value1)); \
+ __ test(IceType_i##Size, dwordAddress(T0), Encoded_GPR_##Src()); \
+ __ mov(IceType_i32, dwordAddress(T0), Immediate(ValueIfFalse)); \
+ Label Done; \
+ __ j(Cond::Br_e, &Done, NearJump); \
+ __ mov(IceType_i32, dwordAddress(T0), Immediate(ValueIfTrue)); \
+ __ bind(&Done); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, uint32_t(Value0)); \
+ test.run(); \
+ \
+ ASSERT_EQ(((Value0)&Mask##Size) & ((Value1)&Mask##Size) ? ValueIfTrue \
+ : ValueIfFalse, \
+ test.contentsOfDword(T0)) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplAddrImm(Value0, Value1, Size) \
+ do { \
+ static constexpr bool NearJump = true; \
+ static constexpr char TestString[] = \
+ "(Addr, " #Value0 ", " #Value1 ", " #Size ")"; \
+ static constexpr uint32_t ValueIfTrue = 0xBEEFFEEB; \
+ static constexpr uint32_t ValueIfFalse = 0x11111111; \
+ const uint32_t T0 = allocateDword(); \
+ \
+ __ test(IceType_i##Size, dwordAddress(T0), \
+ Immediate((Value1)&Mask##Size)); \
+ __ mov(IceType_i32, dwordAddress(T0), Immediate(ValueIfFalse)); \
+ Label Done; \
+ __ j(Cond::Br_e, &Done, NearJump); \
+ __ mov(IceType_i32, dwordAddress(T0), Immediate(ValueIfTrue)); \
+ __ bind(&Done); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, uint32_t(Value0)); \
+ test.run(); \
+ \
+ ASSERT_EQ(((Value0)&Mask##Size) & ((Value1)&Mask##Size) ? ValueIfTrue \
+ : ValueIfFalse, \
+ test.contentsOfDword(T0)) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplValues(Dst, Value0, Src, Value1, Size) \
+ do { \
+ TestImplRegReg(Dst, Value0, Src, Value1, Size); \
+ TestImplRegImm(Dst, Value0, Value1, Size); \
+ TestImplAddrReg(Value0, Src, Value1, Size); \
+ TestImplAddrImm(Value0, Value1, Size); \
+ } while (0)
+
+#define TestImplSize(Dst, Src, Size) \
+ do { \
+ TestImplValues(Dst, 0xF0F12101, Src, 0x00000000, Size); \
+ TestImplValues(Dst, 0xF0000000, Src, 0xF0000000, Size); \
+ TestImplValues(Dst, 0x0F00000F, Src, 0xF00000F0, Size); \
+ } while (0)
+
+#define TestImpl(Dst, Src) \
+ do { \
+ TestImplSize(Dst, Src, 8); \
+ TestImplSize(Dst, Src, 16); \
+ TestImplSize(Dst, Src, 32); \
+ } while (0)
+
+ TestImpl(r1, r2);
+ TestImpl(r2, r3);
+ TestImpl(r3, r4);
+ TestImpl(r4, r5);
+ TestImpl(r5, r6);
+ TestImpl(r6, r7);
+ TestImpl(r7, r8);
+ TestImpl(r8, r10);
+ TestImpl(r10, r11);
+ TestImpl(r11, r12);
+ TestImpl(r12, r13);
+ TestImpl(r13, r14);
+ TestImpl(r14, r15);
+ TestImpl(r15, r1);
+
+#undef TestImpl
+#undef TestImplSize
+#undef TestImplValues
+#undef TestImplAddrImm
+#undef TestImplAddrReg
+#undef TestImplRegImm
+#undef TestImplRegReg
+}
+
+// No mull/div because x86.
+// No shift because x86.
+TEST_F(AssemblerX8664Test, Arith_most) {
+ static constexpr uint32_t Mask8 = 0xFF;
+ static constexpr uint32_t Mask16 = 0xFFFF;
+ static constexpr uint32_t Mask32 = 0xFFFFFFFF;
+
+#define TestImplRegReg(Inst, Dst, Value0, Src, Value1, Type, Size, Op) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Dst ", " #Value0 ", " #Src ", " #Value1 \
+ ", " #Type #Size "_t, " #Op ")"; \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst(), Immediate(Value0)); \
+ __ mov(IceType_i##Size, Encoded_GPR_##Src(), Immediate(Value1)); \
+ __ Inst(IceType_i##Size, Encoded_GPR_##Dst(), Encoded_GPR_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ ASSERT_EQ(Mask##Size &static_cast<uint32_t>( \
+ static_cast<Type##Size##_t>((Value0)&Mask##Size) \
+ Op static_cast<Type##Size##_t>((Value1)&Mask##Size)), \
+ Mask##Size &test.Dst()) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplRegAddr(Inst, Dst, Value0, Value1, Type, Size, Op) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Dst ", " #Value0 ", Addr, " #Value1 ", " #Type #Size \
+ "_t, " #Op ")"; \
+ const uint32_t T0 = allocateDword(); \
+ const uint32_t V0 = Value1; \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst(), Immediate(Value0)); \
+ __ mov(IceType_i##Size, dwordAddress(T0), Immediate(Value1)); \
+ __ Inst(IceType_i##Size, Encoded_GPR_##Dst(), dwordAddress(T0)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, V0); \
+ test.run(); \
+ \
+ ASSERT_EQ(Mask##Size &static_cast<uint32_t>( \
+ static_cast<Type##Size##_t>((Value0)&Mask##Size) \
+ Op static_cast<Type##Size##_t>((Value1)&Mask##Size)), \
+ Mask##Size &test.Dst()) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplRegImm(Inst, Dst, Value0, Imm, Type, Size, Op) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Dst ", " #Value0 ", Imm(" #Imm "), " #Type #Size \
+ "_t, " #Op ")"; \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst(), Immediate(Value0)); \
+ __ Inst(IceType_i##Size, Encoded_GPR_##Dst(), \
+ Immediate((Imm)&Mask##Size)); \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ ASSERT_EQ(Mask##Size &static_cast<uint32_t>( \
+ static_cast<Type##Size##_t>((Value0)&Mask##Size) \
+ Op static_cast<Type##Size##_t>((Imm)&Mask##Size)), \
+ Mask##Size &test.Dst()) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplAddrReg(Inst, Value0, Src, Value1, Type, Size, Op) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", Addr, " #Value0 ", " #Src ", " #Value1 ", " #Type #Size \
+ "_t, " #Op ")"; \
+ const uint32_t T0 = allocateDword(); \
+ const uint32_t V0 = Value0; \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_##Src(), Immediate(Value1)); \
+ __ Inst(IceType_i##Size, dwordAddress(T0), Encoded_GPR_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, V0); \
+ test.run(); \
+ \
+ ASSERT_EQ(Mask##Size &static_cast<uint32_t>( \
+ static_cast<Type##Size##_t>((Value0)&Mask##Size) \
+ Op static_cast<Type##Size##_t>((Value1)&Mask##Size)), \
+ Mask##Size &test.contentsOfDword(T0)) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplAddrImm(Inst, Value0, Imm, Type, Size, Op) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", Addr, " #Value0 ", Imm, " #Imm ", " #Type #Size \
+ "_t, " #Op ")"; \
+ const uint32_t T0 = allocateDword(); \
+ const uint32_t V0 = Value0; \
+ \
+ __ Inst(IceType_i##Size, dwordAddress(T0), Immediate((Imm)&Mask##Size)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, V0); \
+ test.run(); \
+ \
+ ASSERT_EQ(Mask##Size &static_cast<uint32_t>( \
+ static_cast<Type##Size##_t>((Value0)&Mask##Size) \
+ Op static_cast<Type##Size##_t>((Imm)&Mask##Size)), \
+ Mask##Size &test.contentsOfDword(T0)) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplOp(Inst, Dst, Value0, Src, Value1, Type, Size, Op) \
+ do { \
+ TestImplRegReg(Inst, Dst, Value0, Src, Value1, Type, Size, Op); \
+ TestImplRegAddr(Inst, Dst, Value0, Value1, Type, Size, Op); \
+ TestImplRegImm(Inst, Dst, Value0, Value1, Type, Size, Op); \
+ TestImplAddrReg(Inst, Value0, Src, Value1, Type, Size, Op); \
+ TestImplAddrImm(Inst, Value0, Value1, Type, Size, Op); \
+ } while (0)
+
+#define TestImplValues(Dst, Value0, Src, Value1, Size) \
+ do { \
+ TestImplOp(And, Dst, Value0, Src, Value1, int, Size, &); \
+ TestImplOp(And, Dst, Value0, Src, Value1, uint, Size, &); \
+ TestImplOp(Or, Dst, Value0, Src, Value1, int, Size, | ); \
+ TestImplOp(Or, Dst, Value0, Src, Value1, uint, Size, | ); \
+ TestImplOp(Xor, Dst, Value0, Src, Value1, int, Size, ^); \
+ TestImplOp(Xor, Dst, Value0, Src, Value1, uint, Size, ^); \
+ TestImplOp(add, Dst, Value0, Src, Value1, int, Size, +); \
+ TestImplOp(add, Dst, Value0, Src, Value1, uint, Size, +); \
+ TestImplOp(sub, Dst, Value0, Src, Value1, int, Size, -); \
+ TestImplOp(sub, Dst, Value0, Src, Value1, uint, Size, -); \
+ } while (0)
+
+#define TestImplSize(Dst, Src, Size) \
+ do { \
+ TestImplValues(Dst, 0xF0F12101, Src, 0x00000000, Size); \
+ TestImplValues(Dst, 0xF0000000, Src, 0xF0000000, Size); \
+ TestImplValues(Dst, 0x0F00000F, Src, 0xF0000070, Size); \
+ TestImplValues(Dst, 0x0F00F00F, Src, 0xF000F070, Size); \
+ } while (0)
+
+#define TestImpl(Dst, Src) \
+ do { \
+ TestImplSize(Dst, Src, 8); \
+ TestImplSize(Dst, Src, 16); \
+ TestImplSize(Dst, Src, 32); \
+ } while (0)
+
+ TestImpl(r1, r2);
+ TestImpl(r2, r3);
+ TestImpl(r3, r4);
+ TestImpl(r4, r5);
+ TestImpl(r5, r6);
+ TestImpl(r6, r7);
+ TestImpl(r7, r8);
+ TestImpl(r8, r10);
+ TestImpl(r10, r11);
+ TestImpl(r11, r12);
+ TestImpl(r12, r13);
+ TestImpl(r13, r14);
+ TestImpl(r14, r15);
+ TestImpl(r15, r1);
+
+#undef TestImpl
+#undef TestImplSize
+#undef TestImplValues
+#undef TestImplOp
+#undef TestImplAddrImm
+#undef TestImplAddrReg
+#undef TestImplRegImm
+#undef TestImplRegAddr
+#undef TestImplRegReg
+}
+
+TEST_F(AssemblerX8664Test, Arith_BorrowNCarry) {
+ const uint32_t Mask8 = 0x000000FF;
+ const uint32_t Mask16 = 0x0000FFFF;
+ const uint32_t Mask32 = 0xFFFFFFFF;
+
+ const uint64_t ResultMask8 = 0x000000000000FFFFull;
+ const uint64_t ResultMask16 = 0x00000000FFFFFFFFull;
+ const uint64_t ResultMask32 = 0xFFFFFFFFFFFFFFFFull;
+
+#define TestImplRegReg(Inst0, Inst1, Dst0, Dst1, Value0, Src0, Src1, Value1, \
+ Op, Size) \
+ do { \
+ static_assert(Size == 8 || Size == 16 || Size == 32, \
+ "Invalid size " #Size); \
+ static constexpr char TestString[] = \
+ "(" #Inst0 ", " #Inst1 ", " #Dst0 ", " #Dst1 ", " #Value0 ", " #Src0 \
+ ", " #Src1 ", " #Value1 ", " #Op ", " #Size ")"; \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst0(), \
+ Immediate(uint64_t(Value0) & Mask##Size)); \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst1(), \
+ Immediate((uint64_t(Value0) >> Size) & Mask##Size)); \
+ __ mov(IceType_i##Size, Encoded_GPR_##Src0(), \
+ Immediate(uint64_t(Value1) & Mask##Size)); \
+ __ mov(IceType_i##Size, Encoded_GPR_##Src1(), \
+ Immediate((uint64_t(Value1) >> Size) & Mask##Size)); \
+ __ Inst0(IceType_i##Size, Encoded_GPR_##Dst0(), Encoded_GPR_##Src0()); \
+ __ Inst1(IceType_i##Size, Encoded_GPR_##Dst1(), Encoded_GPR_##Src1()); \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ static constexpr uint64_t Result = \
+ (uint64_t(Value0) & ResultMask##Size)Op(uint64_t(Value1) & \
+ ResultMask##Size); \
+ static constexpr uint32_t Expected0 = Result & Mask##Size; \
+ static constexpr uint32_t Expected1 = (Result >> Size) & Mask##Size; \
+ ASSERT_EQ(Expected0, test.Dst0()) << TestString << ": 0"; \
+ ASSERT_EQ(Expected1, test.Dst1()) << TestString << ": 1"; \
+ reset(); \
+ } while (0)
+
+#define TestImplRegAddr(Inst0, Inst1, Dst0, Dst1, Value0, Value1, Op, Size) \
+ do { \
+ static_assert(Size == 8 || Size == 16 || Size == 32, \
+ "Invalid size " #Size); \
+ static constexpr char TestString[] = \
+ "(" #Inst0 ", " #Inst1 ", " #Dst0 ", " #Dst1 ", " #Value0 \
+ ", Addr, " #Value1 ", " #Op ", " #Size ")"; \
+ const uint32_t T0 = allocateDword(); \
+ const uint32_t V0 = uint64_t(Value1) & Mask##Size; \
+ const uint32_t T1 = allocateDword(); \
+ const uint32_t V1 = (uint64_t(Value1) >> Size) & Mask##Size; \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst0(), \
+ Immediate(uint64_t(Value0) & Mask##Size)); \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst1(), \
+ Immediate((uint64_t(Value0) >> Size) & Mask##Size)); \
+ __ Inst0(IceType_i##Size, Encoded_GPR_##Dst0(), dwordAddress(T0)); \
+ __ Inst1(IceType_i##Size, Encoded_GPR_##Dst1(), dwordAddress(T1)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, V0); \
+ test.setDwordTo(T1, V1); \
+ test.run(); \
+ \
+ static constexpr uint64_t Result = \
+ (uint64_t(Value0) & ResultMask##Size)Op(uint64_t(Value1) & \
+ ResultMask##Size); \
+ static constexpr uint32_t Expected0 = Result & Mask##Size; \
+ static constexpr uint32_t Expected1 = (Result >> Size) & Mask##Size; \
+ ASSERT_EQ(Expected0, test.Dst0()) << TestString << ": 0"; \
+ ASSERT_EQ(Expected1, test.Dst1()) << TestString << ": 1"; \
+ reset(); \
+ } while (0)
+
+#define TestImplRegImm(Inst0, Inst1, Dst0, Dst1, Value0, Imm, Op, Size) \
+ do { \
+ static_assert(Size == 8 || Size == 16 || Size == 32, \
+ "Invalid size " #Size); \
+ static constexpr char TestString[] = \
+ "(" #Inst0 ", " #Inst1 ", " #Dst0 ", " #Dst1 ", " #Value0 \
+ ", Imm(" #Imm "), " #Op ", " #Size ")"; \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst0(), \
+ Immediate(uint64_t(Value0) & Mask##Size)); \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst1(), \
+ Immediate((uint64_t(Value0) >> Size) & Mask##Size)); \
+ __ Inst0(IceType_i##Size, Encoded_GPR_##Dst0(), \
+ Immediate(uint64_t(Imm) & Mask##Size)); \
+ __ Inst1(IceType_i##Size, Encoded_GPR_##Dst1(), \
+ Immediate((uint64_t(Imm) >> Size) & Mask##Size)); \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ static constexpr uint64_t Result = \
+ (uint64_t(Value0) & ResultMask##Size)Op(uint64_t(Imm) & \
+ ResultMask##Size); \
+ static constexpr uint32_t Expected0 = Result & Mask##Size; \
+ static constexpr uint32_t Expected1 = (Result >> Size) & Mask##Size; \
+ ASSERT_EQ(Expected0, test.Dst0()) << TestString << ": 0"; \
+ ASSERT_EQ(Expected1, test.Dst1()) << TestString << ": 1"; \
+ reset(); \
+ } while (0)
+
+#define TestImplAddrReg(Inst0, Inst1, Value0, Src0, Src1, Value1, Op, Size) \
+ do { \
+ static_assert(Size == 8 || Size == 16 || Size == 32, \
+ "Invalid size " #Size); \
+ static constexpr char TestString[] = \
+ "(" #Inst0 ", " #Inst1 ", Addr, " #Value0 ", " #Src0 ", " #Src1 \
+ ", " #Value1 ", " #Op ", " #Size ")"; \
+ const uint32_t T0 = allocateDword(); \
+ const uint32_t V0 = uint64_t(Value0) & Mask##Size; \
+ const uint32_t T1 = allocateDword(); \
+ const uint32_t V1 = (uint64_t(Value0) >> Size) & Mask##Size; \
+ __ mov(IceType_i##Size, Encoded_GPR_##Src0(), \
+ Immediate(uint64_t(Value1) & Mask##Size)); \
+ __ mov(IceType_i##Size, Encoded_GPR_##Src1(), \
+ Immediate((uint64_t(Value1) >> Size) & Mask##Size)); \
+ __ Inst0(IceType_i##Size, dwordAddress(T0), Encoded_GPR_##Src0()); \
+ __ Inst1(IceType_i##Size, dwordAddress(T1), Encoded_GPR_##Src1()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, V0); \
+ test.setDwordTo(T1, V1); \
+ test.run(); \
+ \
+ static constexpr uint64_t Result = \
+ (uint64_t(Value0) & ResultMask##Size)Op(uint64_t(Value1) & \
+ ResultMask##Size); \
+ static constexpr uint32_t Expected0 = Result & Mask##Size; \
+ static constexpr uint32_t Expected1 = (Result >> Size) & Mask##Size; \
+ ASSERT_EQ(Expected0, test.contentsOfDword(T0)) << TestString << ": 0"; \
+ ASSERT_EQ(Expected1, test.contentsOfDword(T1)) << TestString << ": 1"; \
+ reset(); \
+ } while (0)
+
+#define TestImplAddrImm(Inst0, Inst1, Value0, Imm, Op, Size) \
+ do { \
+ static_assert(Size == 8 || Size == 16 || Size == 32, \
+ "Invalid size " #Size); \
+ static constexpr char TestString[] = \
+ "(" #Inst0 ", " #Inst1 ", Addr, " #Value0 ", Imm(" #Imm "), " #Op \
+ ", " #Size ")"; \
+ const uint32_t T0 = allocateDword(); \
+ const uint32_t V0 = uint64_t(Value0) & Mask##Size; \
+ const uint32_t T1 = allocateDword(); \
+ const uint32_t V1 = (uint64_t(Value0) >> Size) & Mask##Size; \
+ __ Inst0(IceType_i##Size, dwordAddress(T0), \
+ Immediate(uint64_t(Imm) & Mask##Size)); \
+ __ Inst1(IceType_i##Size, dwordAddress(T1), \
+ Immediate((uint64_t(Imm) >> Size) & Mask##Size)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, V0); \
+ test.setDwordTo(T1, V1); \
+ test.run(); \
+ \
+ static constexpr uint64_t Result = \
+ (uint64_t(Value0) & ResultMask##Size)Op(uint64_t(Imm) & \
+ ResultMask##Size); \
+ static constexpr uint32_t Expected0 = Result & Mask##Size; \
+ static constexpr uint32_t Expected1 = (Result >> Size) & Mask##Size; \
+ ASSERT_EQ(Expected0, test.contentsOfDword(T0)) << TestString << ": 0"; \
+ ASSERT_EQ(Expected1, test.contentsOfDword(T1)) << TestString << ": 1"; \
+ reset(); \
+ } while (0)
+
+#define TestImplOp(Inst0, Inst1, Dst0, Dst1, Value0, Src0, Src1, Value1, Op, \
+ Size) \
+ do { \
+ TestImplRegReg(Inst0, Inst1, Dst0, Dst1, Value0, Src0, Src1, Value1, Op, \
+ Size); \
+ TestImplRegAddr(Inst0, Inst1, Dst0, Dst1, Value0, Value1, Op, Size); \
+ TestImplRegImm(Inst0, Inst1, Dst0, Dst1, Value0, Value1, Op, Size); \
+ TestImplAddrReg(Inst0, Inst1, Value0, Src0, Src1, Value1, Op, Size); \
+ TestImplAddrImm(Inst0, Inst1, Value0, Value1, Op, Size); \
+ } while (0)
+
+#define TestImplValues(Dst0, Dst1, Value0, Src0, Src1, Value1, Size) \
+ do { \
+ TestImplOp(add, adc, Dst0, Dst1, Value0, Src0, Src1, Value1, +, Size); \
+ TestImplOp(sub, sbb, Dst0, Dst1, Value0, Src0, Src1, Value1, -, Size); \
+ } while (0)
+
+#define TestImplSize(Dst0, Dst1, Src0, Src1, Size) \
+ do { \
+ TestImplValues(Dst0, Dst1, 0xFFFFFFFFFFFFFF00ull, Src0, Src1, \
+ 0xFFFFFFFF0000017Full, Size); \
+ } while (0)
+
+#define TestImpl(Dst0, Dst1, Src0, Src1) \
+ do { \
+ TestImplSize(Dst0, Dst1, Src0, Src1, 8); \
+ TestImplSize(Dst0, Dst1, Src0, Src1, 16); \
+ TestImplSize(Dst0, Dst1, Src0, Src1, 32); \
+ } while (0)
+
+ TestImpl(r1, r2, r3, r5);
+ TestImpl(r2, r3, r4, r6);
+ TestImpl(r3, r4, r5, r7);
+ TestImpl(r4, r5, r6, r8);
+ TestImpl(r5, r6, r7, r10);
+ TestImpl(r6, r7, r8, r11);
+ TestImpl(r7, r8, r10, r12);
+ TestImpl(r8, r10, r11, r13);
+ TestImpl(r10, r11, r12, r14);
+ TestImpl(r11, r12, r13, r15);
+ TestImpl(r12, r13, r14, r1);
+ TestImpl(r13, r14, r15, r2);
+ TestImpl(r14, r15, r1, r3);
+ TestImpl(r15, r1, r2, r4);
+
+#undef TestImpl
+#undef TestImplSize
+#undef TestImplValues
+#undef TestImplOp
+#undef TestImplAddrImm
+#undef TestImplAddrReg
+#undef TestImplRegImm
+#undef TestImplRegAddr
+#undef TestImplRegReg
+}
+
+TEST_F(AssemblerX8664LowLevelTest, Cbw_Cwd_Cdq) {
+#define TestImpl(Inst, BytesSize, ...) \
+ do { \
+ __ Inst(); \
+ ASSERT_EQ(BytesSize, codeBytesSize()) << #Inst; \
+ ASSERT_TRUE(verifyBytes<BytesSize>(codeBytes(), __VA_ARGS__)); \
+ reset(); \
+ } while (0)
+
+ TestImpl(cbw, 2u, 0x66, 0x98);
+ TestImpl(cwd, 2u, 0x66, 0x99);
+ TestImpl(cdq, 1u, 0x99);
+
+#undef TestImpl
+}
+
+TEST_F(AssemblerX8664Test, SingleOperandMul) {
+ static constexpr uint32_t Mask8 = 0x000000FF;
+ static constexpr uint32_t Mask16 = 0x0000FFFF;
+ static constexpr uint32_t Mask32 = 0xFFFFFFFF;
+
+#define TestImplReg(Inst, Value0, Src, Value1, Type, Size) \
+ do { \
+ static_assert(Encoded_GPR_eax() != Encoded_GPR_##Src(), \
+ "eax can not be src1."); \
+ \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Value0 ", " #Src ", " #Value1 ", " #Type ", " #Size \
+ ")"; \
+ static constexpr Type##64_t OperandEax = \
+ static_cast<Type##Size##_t>((Value0)&Mask##Size); \
+ static constexpr Type##64_t OperandOther = \
+ static_cast<Type##Size##_t>((Value1)&Mask##Size); \
+ static constexpr uint32_t ExpectedEax = \
+ Mask##Size & (OperandEax * OperandOther); \
+ static constexpr uint32_t ExpectedEdx = \
+ Mask##Size & ((OperandEax * OperandOther) >> Size); \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_eax(), \
+ Immediate((Value0)&Mask##Size)); \
+ __ mov(IceType_i##Size, Encoded_GPR_##Src(), \
+ Immediate((Value1)&Mask##Size)); \
+ __ Inst(IceType_i##Size, Encoded_GPR_##Src()); \
+ \
+ if (Size == 8) { \
+ /* mov %ah, %dl */ \
+ __ mov(IceType_i16, Encoded_GPR_dx(), Encoded_GPR_ax()); \
+ __ shr(IceType_i32, Encoded_GPR_edx(), Immediate(8)); \
+ __ And(IceType_i16, Encoded_GPR_ax(), Immediate(0x00FF)); \
+ } \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ ASSERT_EQ(ExpectedEax, test.eax()) << TestString; \
+ ASSERT_EQ(ExpectedEdx, test.edx()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplAddr(Inst, Value0, Value1, Type, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Value0 ", Addr, " #Value1 ", " #Type ", " #Size ")"; \
+ static const uint32_t T0 = allocateDword(); \
+ static constexpr uint32_t V0 = Value1; \
+ static constexpr Type##64_t OperandEax = \
+ static_cast<Type##Size##_t>((Value0)&Mask##Size); \
+ static constexpr Type##64_t OperandOther = \
+ static_cast<Type##Size##_t>((Value1)&Mask##Size); \
+ static constexpr uint32_t ExpectedEax = \
+ Mask##Size & (OperandEax * OperandOther); \
+ static constexpr uint32_t ExpectedEdx = \
+ Mask##Size & ((OperandEax * OperandOther) >> Size); \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_eax(), \
+ Immediate((Value0)&Mask##Size)); \
+ __ Inst(IceType_i##Size, dwordAddress(T0)); \
+ \
+ if (Size == 8) { \
+ /* mov %ah, %dl */ \
+ __ mov(IceType_i16, Encoded_GPR_dx(), Encoded_GPR_ax()); \
+ __ shr(IceType_i32, Encoded_GPR_edx(), Immediate(8)); \
+ __ And(IceType_i16, Encoded_GPR_ax(), Immediate(0x00FF)); \
+ } \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, V0); \
+ test.run(); \
+ \
+ ASSERT_EQ(ExpectedEax, test.eax()) << TestString; \
+ ASSERT_EQ(ExpectedEdx, test.edx()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplOp(Inst, Value0, Src, Value1, Type, Size) \
+ do { \
+ TestImplReg(Inst, Value0, Src, Value1, Type, Size); \
+ TestImplAddr(Inst, Value0, Value1, Type, Size); \
+ } while (0)
+
+#define TestImplValue(Value0, Src, Value1, Size) \
+ do { \
+ TestImplOp(mul, Value0, Src, Value1, uint, Size); \
+ TestImplOp(imul, Value0, Src, Value1, int, Size); \
+ } while (0)
+
+#define TestImplSize(Src, Size) \
+ do { \
+ TestImplValue(10, Src, 1, Size); \
+ TestImplValue(10, Src, -1, Size); \
+ TestImplValue(-10, Src, 37, Size); \
+ TestImplValue(-10, Src, -15, Size); \
+ } while (0)
+
+#define TestImpl(Src) \
+ do { \
+ TestImplSize(Src, 8); \
+ TestImplSize(Src, 16); \
+ TestImplSize(Src, 32); \
+ } while (0)
+
+ TestImpl(r2);
+ TestImpl(r3);
+ TestImpl(r4);
+ TestImpl(r5);
+ TestImpl(r6);
+ TestImpl(r7);
+ TestImpl(r8);
+ TestImpl(r10);
+ TestImpl(r11);
+ TestImpl(r12);
+ TestImpl(r13);
+ TestImpl(r14);
+ TestImpl(r15);
+
+#undef TestImpl
+#undef TestImplSize
+#undef TestImplValue
+#undef TestImplOp
+#undef TestImplAddr
+#undef TestImplReg
+}
+
+TEST_F(AssemblerX8664Test, TwoOperandImul) {
+ static constexpr uint32_t Mask16 = 0x0000FFFF;
+ static constexpr uint32_t Mask32 = 0xFFFFFFFF;
+
+#define TestImplRegReg(Dst, Value0, Src, Value1, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ")"; \
+ static constexpr int64_t Operand0 = \
+ static_cast<int##Size##_t>((Value0)&Mask##Size); \
+ static constexpr int64_t Operand1 = \
+ static_cast<int##Size##_t>((Value1)&Mask##Size); \
+ static constexpr uint32_t Expected = Mask##Size & (Operand0 * Operand1); \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst(), \
+ Immediate((Value0)&Mask##Size)); \
+ __ mov(IceType_i##Size, Encoded_GPR_##Src(), \
+ Immediate((Value1)&Mask##Size)); \
+ __ imul(IceType_i##Size, Encoded_GPR_##Dst(), Encoded_GPR_##Src()); \
+ \
+ if (Size == 8) { \
+ /* mov %ah, %dl */ \
+ __ mov(IceType_i16, Encoded_GPR_dx(), Encoded_GPR_ax()); \
+ __ shr(IceType_i32, Encoded_GPR_edx(), Immediate(8)); \
+ __ And(IceType_i16, Encoded_GPR_ax(), Immediate(0x00FF)); \
+ } \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ ASSERT_EQ(Expected, test.Dst()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplRegImm(Dst, Value0, Imm, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", Imm(" #Imm "), " #Size ")"; \
+ static constexpr int64_t Operand0 = \
+ static_cast<int##Size##_t>((Value0)&Mask##Size); \
+ static constexpr int64_t Operand1 = \
+ static_cast<int##Size##_t>((Imm)&Mask##Size); \
+ static constexpr uint32_t Expected = Mask##Size & (Operand0 * Operand1); \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst(), \
+ Immediate((Value0)&Mask##Size)); \
+ __ imul(IceType_i##Size, Encoded_GPR_##Dst(), Immediate(Imm)); \
+ \
+ if (Size == 8) { \
+ /* mov %ah, %dl */ \
+ __ mov(IceType_i16, Encoded_GPR_dx(), Encoded_GPR_ax()); \
+ __ shr(IceType_i32, Encoded_GPR_edx(), Immediate(8)); \
+ __ And(IceType_i16, Encoded_GPR_ax(), Immediate(0x00FF)); \
+ } \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ ASSERT_EQ(Expected, test.Dst()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplRegAddr(Dst, Value0, Value1, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", Addr," #Value1 ", " #Size ")"; \
+ static constexpr int64_t Operand0 = \
+ static_cast<int##Size##_t>((Value0)&Mask##Size); \
+ static constexpr int64_t Operand1 = \
+ static_cast<int##Size##_t>((Value1)&Mask##Size); \
+ static constexpr uint32_t Expected = Mask##Size & (Operand0 * Operand1); \
+ const uint32_t T0 = allocateDword(); \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst(), \
+ Immediate((Value0)&Mask##Size)); \
+ __ imul(IceType_i##Size, Encoded_GPR_##Dst(), dwordAddress(T0)); \
+ \
+ if (Size == 8) { \
+ /* mov %ah, %dl */ \
+ __ mov(IceType_i16, Encoded_GPR_dx(), Encoded_GPR_ax()); \
+ __ shr(IceType_i32, Encoded_GPR_edx(), Immediate(8)); \
+ __ And(IceType_i16, Encoded_GPR_ax(), Immediate(0x00FF)); \
+ } \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, static_cast<uint32_t>(Operand1)); \
+ test.run(); \
+ \
+ ASSERT_EQ(Expected, test.Dst()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplValue(Dst, Value0, Src, Value1, Size) \
+ do { \
+ TestImplRegReg(Dst, Value0, Src, Value1, Size); \
+ TestImplRegImm(Dst, Value0, Value1, Size); \
+ TestImplRegAddr(Dst, Value0, Value1, Size); \
+ } while (0)
+
+#define TestImplSize(Dst, Src, Size) \
+ do { \
+ TestImplValue(Dst, 1, Src, 1, Size); \
+ TestImplValue(Dst, -10, Src, 0x4050AA20, Size); \
+ TestImplValue(Dst, -2, Src, -55, Size); \
+ } while (0)
+
+#define TestImpl(Dst, Src) \
+ do { \
+ TestImplSize(Dst, Src, 16); \
+ TestImplSize(Dst, Src, 32); \
+ } while (0)
+
+ TestImpl(r1, r2);
+ TestImpl(r2, r3);
+ TestImpl(r3, r4);
+ TestImpl(r4, r5);
+ TestImpl(r5, r6);
+ TestImpl(r6, r7);
+ TestImpl(r7, r8);
+ TestImpl(r8, r10);
+ TestImpl(r10, r11);
+ TestImpl(r11, r12);
+ TestImpl(r12, r13);
+ TestImpl(r13, r14);
+ TestImpl(r14, r15);
+ TestImpl(r15, r1);
+
+#undef TestImpl
+#undef TestImplSize
+#undef TestImplValue
+#undef TestImplRegAddr
+#undef TestImplRegImm
+#undef TestImplRegReg
+}
+
+TEST_F(AssemblerX8664Test, Div) {
+ static constexpr uint32_t Mask8 = 0x000000FF;
+ static constexpr uint32_t Mask16 = 0x0000FFFF;
+ static constexpr uint32_t Mask32 = 0xFFFFFFFF;
+
+ static constexpr uint64_t Operand0Mask8 = 0x00000000000000FFull;
+ static constexpr uint64_t Operand0Mask16 = 0x00000000FFFFFFFFull;
+ static constexpr uint64_t Operand0Mask32 = 0xFFFFFFFFFFFFFFFFull;
+
+ using Operand0Type_int8 = int16_t;
+ using Operand0Type_uint8 = uint16_t;
+ using Operand0Type_int16 = int32_t;
+ using Operand0Type_uint16 = uint32_t;
+ using Operand0Type_int32 = int64_t;
+ using Operand0Type_uint32 = uint64_t;
+
+#define TestImplReg(Inst, Value0, Src, Value1, Type, Size) \
+ do { \
+ static_assert(Encoded_GPR_eax() != Encoded_GPR_##Src(), \
+ "eax can not be src1."); \
+ static_assert(Encoded_GPR_edx() != Encoded_GPR_##Src(), \
+ "edx can not be src1."); \
+ \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Value0 ", " #Src ", " #Value1 ", " #Type ", " #Size \
+ ")"; \
+ static constexpr Operand0Type_##Type##Size Operand0 = \
+ static_cast<Type##64_t>(Value0) & Operand0Mask##Size; \
+ static constexpr Type##Size##_t Operand0Lo = Operand0 & Mask##Size; \
+ static constexpr Type##Size##_t Operand0Hi = \
+ (Operand0 >> Size) & Mask##Size; \
+ static constexpr Type##Size##_t Operand1 = \
+ static_cast<Type##Size##_t>(Value1) & Mask##Size; \
+ if (Size == 8) { \
+ /* mov Operand0Hi|Operand0Lo, %ah|%al */ \
+ __ mov( \
+ IceType_i16, Encoded_GPR_eax(), \
+ Immediate((static_cast<uint16_t>(Operand0Hi) << 8 | Operand0Lo))); \
+ } else { \
+ __ mov(IceType_i##Size, Encoded_GPR_eax(), Immediate(Operand0Lo)); \
+ __ mov(IceType_i##Size, Encoded_GPR_edx(), Immediate(Operand0Hi)); \
+ } \
+ __ mov(IceType_i##Size, Encoded_GPR_##Src(), Immediate(Operand1)); \
+ __ Inst(IceType_i##Size, Encoded_GPR_##Src()); \
+ if (Size == 8) { \
+ /* mov %ah, %dl */ \
+ __ mov(IceType_i16, Encoded_GPR_dx(), Encoded_GPR_ax()); \
+ __ shr(IceType_i32, Encoded_GPR_edx(), Immediate(8)); \
+ __ And(IceType_i16, Encoded_GPR_eax(), Immediate(0x00FF)); \
+ if (Encoded_GPR_##Src() == Encoded_GPR_esi()) { \
+ __ And(IceType_i16, Encoded_GPR_edx(), Immediate(0x00FF)); \
+ } \
+ } \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ static constexpr uint32_t Quocient = (Operand0 / Operand1) & Mask##Size; \
+ static constexpr uint32_t Reminder = (Operand0 % Operand1) & Mask##Size; \
+ ASSERT_EQ(Quocient, test.eax()) << TestString; \
+ ASSERT_EQ(Reminder, test.edx()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplAddr(Inst, Value0, Value1, Type, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Value0 ", Addr, " #Value1 ", " #Type ", " #Size ")"; \
+ static constexpr Operand0Type_##Type##Size Operand0 = \
+ static_cast<Type##64_t>(Value0) & Operand0Mask##Size; \
+ static constexpr Type##Size##_t Operand0Lo = Operand0 & Mask##Size; \
+ static constexpr Type##Size##_t Operand0Hi = \
+ (Operand0 >> Size) & Mask##Size; \
+ const uint32_t T0 = allocateDword(); \
+ static constexpr Type##Size##_t V0 = \
+ static_cast<Type##Size##_t>(Value1) & Mask##Size; \
+ if (Size == 8) { \
+ /* mov Operand0Hi|Operand0Lo, %ah|%al */ \
+ __ mov( \
+ IceType_i16, Encoded_GPR_eax(), \
+ Immediate((static_cast<uint16_t>(Operand0Hi) << 8 | Operand0Lo))); \
+ } else { \
+ __ mov(IceType_i##Size, Encoded_GPR_eax(), Immediate(Operand0Lo)); \
+ __ mov(IceType_i##Size, Encoded_GPR_edx(), Immediate(Operand0Hi)); \
+ } \
+ __ Inst(IceType_i##Size, dwordAddress(T0)); \
+ if (Size == 8) { \
+ /* mov %ah, %dl */ \
+ __ mov(IceType_i16, Encoded_GPR_dx(), Encoded_GPR_ax()); \
+ __ shr(IceType_i32, Encoded_GPR_edx(), Immediate(8)); \
+ __ And(IceType_i16, Encoded_GPR_eax(), Immediate(0x00FF)); \
+ } \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, static_cast<uint32_t>(V0)); \
+ test.run(); \
+ \
+ static constexpr uint32_t Quocient = (Operand0 / V0) & Mask##Size; \
+ static constexpr uint32_t Reminder = (Operand0 % V0) & Mask##Size; \
+ ASSERT_EQ(Quocient, test.eax()) << TestString; \
+ ASSERT_EQ(Reminder, test.edx()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplOp(Inst, Value0, Src, Value1, Type, Size) \
+ do { \
+ TestImplReg(Inst, Value0, Src, Value1, Type, Size); \
+ TestImplAddr(Inst, Value0, Value1, Type, Size); \
+ } while (0)
+
+#define TestImplValue(Value0, Src, Value1, Size) \
+ do { \
+ TestImplOp(div, Value0, Src, Value1, uint, Size); \
+ TestImplOp(idiv, Value0, Src, Value1, int, Size); \
+ } while (0)
+
+#define TestImplSize(Src, Size) \
+ do { \
+ TestImplValue(10, Src, 1, Size); \
+ TestImplValue(10, Src, -1, Size); \
+ } while (0)
+
+#define TestImpl(Src) \
+ do { \
+ TestImplSize(Src, 8); \
+ TestImplSize(Src, 16); \
+ TestImplSize(Src, 32); \
+ } while (0)
+
+ TestImpl(r2);
+ TestImpl(r3);
+ TestImpl(r5);
+ TestImpl(r6);
+ TestImpl(r7);
+ TestImpl(r8);
+ TestImpl(r10);
+ TestImpl(r11);
+ TestImpl(r12);
+ TestImpl(r13);
+ TestImpl(r14);
+ TestImpl(r15);
+
+#undef TestImpl
+#undef TestImplSize
+#undef TestImplValue
+#undef TestImplOp
+#undef TestImplAddr
+#undef TestImplReg
+}
+
+TEST_F(AssemblerX8664Test, Incl_Decl_Addr) {
+#define TestImpl(Inst, Value0) \
+ do { \
+ const bool IsInc = std::string(#Inst).find("incl") != std::string::npos; \
+ const uint32_t T0 = allocateDword(); \
+ const uint32_t V0 = Value0; \
+ \
+ __ Inst(dwordAddress(T0)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, V0); \
+ test.run(); \
+ \
+ ASSERT_EQ(static_cast<uint32_t>(Value0 + (IsInc ? 1 : -1)), \
+ test.contentsOfDword(T0)); \
+ reset(); \
+ } while (0)
+
+#define TestInc(Value0) \
+ do { \
+ TestImpl(incl, Value0); \
+ } while (0)
+
+#define TestDec(Value0) \
+ do { \
+ TestImpl(decl, Value0); \
+ } while (0)
+
+ TestInc(230);
+
+ TestDec(30);
+
+#undef TestInc
+#undef TestDec
+#undef TestImpl
+}
+
+TEST_F(AssemblerX8664Test, Shifts) {
+ static constexpr uint32_t Mask8 = 0x000000FF;
+ static constexpr uint32_t Mask16 = 0x0000FFFF;
+ static constexpr uint32_t Mask32 = 0xFFFFFFFF;
+
+#define TestImplRegImm(Inst, Dst, Value0, Imm, Op, Type, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Dst ", " #Value0 ", Imm(" #Imm "), " #Op ", " #Type \
+ ", " #Size ")"; \
+ const bool IsRol = std::string(#Inst).find("rol") != std::string::npos; \
+ const uint##Size##_t Expected = \
+ Mask##Size & (static_cast<Type##Size##_t>(Value0) Op(Imm) | \
+ (!IsRol ? 0 : (Value0) >> (Size - Imm))); \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst(), \
+ Immediate((Value0)&Mask##Size)); \
+ __ Inst(IceType_i##Size, Encoded_GPR_##Dst(), \
+ Immediate((Imm)&Mask##Size)); \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ ASSERT_EQ(static_cast<uint32_t>(Expected), test.Dst()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplRegRegImm(Inst, Dst, Value0, Src, Value1, Count, Op0, Op1, \
+ Type, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Dst ", " #Value0 ", " #Src ", " #Value1 \
+ ", Imm(" #Count "), " #Op0 ", " #Op1 ", " #Type ", " #Size ")"; \
+ const uint##Size##_t Expected = \
+ Mask##Size & (static_cast<Type##Size##_t>(Value0) Op0(Count) | \
+ (static_cast<Type##64_t>(Value1) Op1(Size - Count))); \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst(), \
+ Immediate((Value0)&Mask##Size)); \
+ __ mov(IceType_i##Size, Encoded_GPR_##Src(), \
+ Immediate((Value1)&Mask##Size)); \
+ __ Inst(IceType_i##Size, Encoded_GPR_##Dst(), Encoded_GPR_##Src(), \
+ Immediate(Count)); \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ ASSERT_EQ(static_cast<uint32_t>(Expected), test.Dst()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplRegCl(Inst, Dst, Value0, Count, Op, Type, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Dst ", " #Value0 ", " #Count ", " #Op ", " #Type \
+ ", " #Size ")"; \
+ const bool IsRol = std::string(#Inst).find("rol") != std::string::npos; \
+ const uint##Size##_t Expected = \
+ Mask##Size & (static_cast<Type##Size##_t>(Value0) Op(Count) | \
+ (!IsRol ? 0 : Value0 >> (Size - Count))); \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst(), \
+ Immediate((Value0)&Mask##Size)); \
+ __ mov(IceType_i8, Encoded_GPR_ecx(), Immediate((Count)&Mask##Size)); \
+ __ Inst(IceType_i##Size, Encoded_GPR_##Dst(), Encoded_GPR_ecx()); \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ ASSERT_EQ(static_cast<uint32_t>(Expected), test.Dst()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplRegRegCl(Inst, Dst, Value0, Src, Value1, Count, Op0, Op1, \
+ Type, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Count \
+ ", " #Op0 ", " #Op1 ", " #Type ", " #Size ")"; \
+ const uint##Size##_t Expected = \
+ Mask##Size & (static_cast<Type##Size##_t>(Value0) Op0(Count) | \
+ (static_cast<Type##64_t>(Value1) Op1(Size - Count))); \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst(), \
+ Immediate((Value0)&Mask##Size)); \
+ __ mov(IceType_i##Size, Encoded_GPR_##Src(), \
+ Immediate((Value1)&Mask##Size)); \
+ __ mov(IceType_i##Size, Encoded_GPR_ecx(), Immediate((Count)&0x7F)); \
+ __ Inst(IceType_i##Size, Encoded_GPR_##Dst(), Encoded_GPR_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ ASSERT_EQ(static_cast<uint32_t>(Expected), test.Dst()) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplAddrCl(Inst, Value0, Count, Op, Type, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", Addr, " #Value0 ", " #Count ", " #Op ", " #Type \
+ ", " #Size ")"; \
+ const bool IsRol = std::string(#Inst).find("rol") != std::string::npos; \
+ const uint##Size##_t Expected = \
+ Mask##Size & (static_cast<Type##Size##_t>(Value0) Op(Count) | \
+ (!IsRol ? 0 : Value0 >> (Size - Count))); \
+ const uint32_t T0 = allocateDword(); \
+ const uint32_t V0 = Value0; \
+ \
+ __ mov(IceType_i8, Encoded_GPR_ecx(), Immediate((Count)&Mask##Size)); \
+ __ Inst(IceType_i##Size, dwordAddress(T0), Encoded_GPR_ecx()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, V0); \
+ test.run(); \
+ \
+ ASSERT_EQ(static_cast<uint32_t>(Expected), \
+ Mask##Size &test.contentsOfDword(T0)) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplAddrRegCl(Inst, Value0, Src, Value1, Count, Op0, Op1, Type, \
+ Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", Addr, " #Value0 ", " #Src ", " #Value1 ", " #Count \
+ ", " #Op0 ", " #Op1 ", " #Type ", " #Size ")"; \
+ const uint##Size##_t Expected = \
+ Mask##Size & (static_cast<Type##Size##_t>(Value0) Op0(Count) | \
+ (static_cast<Type##64_t>(Value1) Op1(Size - Count))); \
+ const uint32_t T0 = allocateDword(); \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_##Src(), \
+ Immediate((Value1)&Mask##Size)); \
+ __ mov(IceType_i##Size, Encoded_GPR_ecx(), Immediate((Count)&0x7F)); \
+ __ Inst(IceType_i##Size, dwordAddress(T0), Encoded_GPR_##Src()); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, static_cast<uint32_t>(Value0)); \
+ test.run(); \
+ \
+ ASSERT_EQ(static_cast<uint32_t>(Expected), test.contentsOfDword(T0)) \
+ << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplOp(Inst, Dst, Value0, Count, Op, Type, Size) \
+ do { \
+ static_assert(Encoded_GPR_##Dst() != Encoded_GPR_ecx(), \
+ "ecx should not be specified as Dst"); \
+ TestImplRegImm(Inst, Dst, Value0, Count, Op, Type, Size); \
+ TestImplRegImm(Inst, ecx, Value0, Count, Op, Type, Size); \
+ TestImplRegCl(Inst, Dst, Value0, Count, Op, Type, Size); \
+ TestImplAddrCl(Inst, Value0, Count, Op, Type, Size); \
+ } while (0)
+
+#define TestImplThreeOperandOp(Inst, Dst, Value0, Src, Value1, Count, Op0, \
+ Op1, Type, Size) \
+ do { \
+ static_assert(Encoded_GPR_##Dst() != Encoded_GPR_ecx(), \
+ "ecx should not be specified as Dst"); \
+ static_assert(Encoded_GPR_##Src() != Encoded_GPR_ecx(), \
+ "ecx should not be specified as Src"); \
+ TestImplRegRegImm(Inst, Dst, Value0, Src, Value1, Count, Op0, Op1, Type, \
+ Size); \
+ TestImplRegRegCl(Inst, Dst, Value0, Src, Value1, Count, Op0, Op1, Type, \
+ Size); \
+ TestImplAddrRegCl(Inst, Value0, Src, Value1, Count, Op0, Op1, Type, Size); \
+ } while (0)
+
+#define TestImplValue(Dst, Value0, Count, Size) \
+ do { \
+ TestImplOp(rol, Dst, Value0, Count, <<, uint, Size); \
+ TestImplOp(shl, Dst, Value0, Count, <<, uint, Size); \
+ TestImplOp(shr, Dst, Value0, Count, >>, uint, Size); \
+ TestImplOp(sar, Dst, Value0, Count, >>, int, Size); \
+ } while (0)
+
+#define TestImplThreeOperandValue(Dst, Value0, Src, Value1, Count, Size) \
+ do { \
+ TestImplThreeOperandOp(shld, Dst, Value0, Src, Value1, Count, <<, >>, \
+ uint, Size); \
+ TestImplThreeOperandOp(shrd, Dst, Value0, Src, Value1, Count, >>, <<, \
+ uint, Size); \
+ } while (0)
+
+#define TestImplSize(Dst, Size) \
+ do { \
+ TestImplValue(Dst, 0x8F, 3, Size); \
+ TestImplValue(Dst, 0x8FFF, 7, Size); \
+ TestImplValue(Dst, 0x8FFFF, 7, Size); \
+ } while (0)
+
+#define TestImplThreeOperandSize(Dst, Src, Size) \
+ do { \
+ TestImplThreeOperandValue(Dst, 0xFFF3, Src, 0xA000, 8, Size); \
+ } while (0)
+
+#define TestImpl(Dst, Src) \
+ do { \
+ TestImplSize(Dst, 8); \
+ TestImplSize(Dst, 16); \
+ TestImplThreeOperandSize(Dst, Src, 16); \
+ TestImplSize(Dst, 32); \
+ TestImplThreeOperandSize(Dst, Src, 32); \
+ } while (0)
+
+ TestImpl(r1, r2);
+ TestImpl(r2, r4);
+ TestImpl(r4, r5);
+ TestImpl(r5, r6);
+ TestImpl(r6, r7);
+ TestImpl(r7, r8);
+ TestImpl(r8, r10);
+ TestImpl(r10, r11);
+ TestImpl(r11, r12);
+ TestImpl(r12, r13);
+ TestImpl(r13, r14);
+ TestImpl(r14, r15);
+ TestImpl(r15, r1);
+
+#undef TestImpl
+#undef TestImplThreeOperandSize
+#undef TestImplSize
+#undef TestImplValue
+#undef TestImplThreeOperandValue
+#undef TestImplOp
+#undef TestImplThreeOperandOp
+#undef TestImplAddrCl
+#undef TestImplRegRegCl
+#undef TestImplRegCl
+#undef TestImplRegRegImm
+#undef TestImplRegImm
+}
+
+TEST_F(AssemblerX8664Test, Neg) {
+ static constexpr uint32_t Mask8 = 0x000000ff;
+ static constexpr uint32_t Mask16 = 0x0000ffff;
+ static constexpr uint32_t Mask32 = 0xffffffff;
+
+#define TestImplReg(Dst, Size) \
+ do { \
+ static constexpr int32_t Value = 0xFF00A543; \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst(), \
+ Immediate(static_cast<int##Size##_t>(Value) & Mask##Size)); \
+ __ neg(IceType_i##Size, Encoded_GPR_##Dst()); \
+ __ mov(IceType_i##Size, Encoded_GPR_eax(), Encoded_GPR_##Dst()); \
+ __ And(IceType_i32, Encoded_GPR_eax(), Immediate(Mask##Size)); \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ ASSERT_EQ(1 + (~static_cast<int##Size##_t>(Value) & Mask##Size), \
+ test.eax()) \
+ << "(" #Dst ", " #Size ")"; \
+ reset(); \
+ } while (0)
+
+#define TestImplAddr(Size) \
+ do { \
+ static constexpr int32_t Value = 0xFF00A543; \
+ const uint32_t T0 = allocateDword(); \
+ __ neg(IceType_i##Size, dwordAddress(T0)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, Value &Mask##Size); \
+ test.run(); \
+ \
+ ASSERT_EQ(1 + (~static_cast<int##Size##_t>(Value) & Mask##Size), \
+ test.contentsOfDword(T0)) \
+ << "(Addr, " #Size ")"; \
+ reset(); \
+ } while (0)
+
+#define TestImpl(Size) \
+ do { \
+ TestImplAddr(Size); \
+ TestImplReg(r1, Size); \
+ TestImplReg(r2, Size); \
+ TestImplReg(r3, Size); \
+ TestImplReg(r4, Size); \
+ TestImplReg(r5, Size); \
+ TestImplReg(r6, Size); \
+ TestImplReg(r7, Size); \
+ TestImplReg(r8, Size); \
+ TestImplReg(r10, Size); \
+ TestImplReg(r11, Size); \
+ TestImplReg(r12, Size); \
+ TestImplReg(r13, Size); \
+ TestImplReg(r14, Size); \
+ TestImplReg(r15, Size); \
+ } while (0)
+
+ TestImpl(8);
+ TestImpl(16);
+ TestImpl(32);
+
+#undef TestImpl
+#undef TestImplAddr
+#undef TestImplReg
+}
+
+TEST_F(AssemblerX8664Test, Not) {
+#define TestImpl(Dst) \
+ do { \
+ static constexpr uint32_t Value = 0xFF00A543; \
+ __ mov(IceType_i32, Encoded_GPR_##Dst(), Immediate(Value)); \
+ __ notl(Encoded_GPR_##Dst()); \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ ASSERT_EQ(~Value, test.Dst()) << "(" #Dst ")"; \
+ reset(); \
+ } while (0)
+
+ TestImpl(r1);
+ TestImpl(r2);
+ TestImpl(r3);
+ TestImpl(r4);
+ TestImpl(r5);
+ TestImpl(r6);
+ TestImpl(r7);
+ TestImpl(r8);
+ TestImpl(r10);
+ TestImpl(r11);
+ TestImpl(r12);
+ TestImpl(r13);
+ TestImpl(r14);
+ TestImpl(r15);
+
+#undef TestImpl
+}
+
+TEST_F(AssemblerX8664Test, Bswap) {
+#define TestImpl(Dst) \
+ do { \
+ static constexpr uint32_t Value = 0xFF00A543; \
+ static constexpr uint32_t Expected = 0x43A500FF; \
+ __ mov(IceType_i32, Encoded_GPR_##Dst(), Immediate(Value)); \
+ __ bswap(IceType_i32, Encoded_GPR_##Dst()); \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ ASSERT_EQ(Expected, test.Dst()) << "(" #Dst ")"; \
+ reset(); \
+ } while (0)
+
+ TestImpl(r1);
+ TestImpl(r2);
+ TestImpl(r3);
+ TestImpl(r4);
+ TestImpl(r5);
+ TestImpl(r6);
+ TestImpl(r7);
+ TestImpl(r8);
+ TestImpl(r10);
+ TestImpl(r11);
+ TestImpl(r12);
+ TestImpl(r13);
+ TestImpl(r14);
+ TestImpl(r15);
+
+#undef TestImpl
+}
+
+TEST_F(AssemblerX8664Test, Bt) {
+#define TestImpl(Dst, Value0, Src, Value1) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ")"; \
+ static constexpr uint32_t Expected = ((Value0) & (1u << (Value1))) != 0; \
+ \
+ __ mov(IceType_i32, Encoded_GPR_##Dst(), Immediate(Value0)); \
+ __ mov(IceType_i32, Encoded_GPR_##Src(), Immediate(Value1)); \
+ __ bt(Encoded_GPR_##Dst(), Encoded_GPR_##Src()); \
+ __ setcc(Cond::Br_b, ByteRegister::Encoded_Reg_al); \
+ __ And(IceType_i32, Encoded_GPR_eax(), Immediate(0xFFu)); \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ ASSERT_EQ(Expected, test.eax()) << TestString; \
+ reset(); \
+ } while (0)
+
+ TestImpl(r1, 0x08000000, r2, 27u);
+ TestImpl(r2, 0x08000000, r3, 23u);
+ TestImpl(r3, 0x00000000, r4, 1u);
+ TestImpl(r4, 0x08000300, r5, 9u);
+ TestImpl(r5, 0x08000300, r6, 10u);
+ TestImpl(r6, 0x7FFFEFFF, r7, 13u);
+ TestImpl(r7, 0x08000000, r8, 27u);
+ TestImpl(r8, 0x08000000, r10, 23u);
+ TestImpl(r10, 0x00000000, r11, 1u);
+ TestImpl(r11, 0x08000300, r12, 9u);
+ TestImpl(r12, 0x08000300, r13, 10u);
+ TestImpl(r13, 0x7FFFEFFF, r14, 13u);
+ TestImpl(r14, 0x08000000, r15, 27u);
+ TestImpl(r15, 0x08000000, r1, 23u);
+
+#undef TestImpl
+}
+
+template <uint32_t Value, uint32_t Bits> class BitScanHelper {
+ BitScanHelper() = delete;
+
+public:
+ static_assert(Bits == 16 || Bits == 32, "Bits must be 16 or 32");
+ using ValueType =
+ typename std::conditional<Bits == 16, uint16_t, uint32_t>::type;
+
+private:
+ static constexpr ValueType BitIndex(bool Forward, ValueType Index) {
+ return (Value == 0)
+ ? BitScanHelper<Value, Bits>::NoBitSet
+ : (Value & (1u << Index)
+ ? Index
+ : BitIndex(Forward, (Forward ? Index + 1 : Index - 1)));
+ }
+
+public:
+ static constexpr ValueType NoBitSet = static_cast<ValueType>(-1);
+ static constexpr ValueType bsf = BitIndex(/*Forward*/ true, /*Index=*/0);
+ static constexpr ValueType bsr =
+ BitIndex(/*Forward*/ false, /*Index=*/Bits - 1);
+};
+
+TEST_F(AssemblerX8664Test, BitScanOperations) {
+#define TestImplRegReg(Inst, Dst, Src, Value1, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Dst ", " #Src ", " #Value1 ", " #Size ")"; \
+ static constexpr uint32_t Expected = BitScanHelper<Value1, Size>::Inst; \
+ const uint32_t ZeroFlag = allocateDword(); \
+ __ mov(IceType_i##Size, Encoded_GPR_##Src(), Immediate(Value1)); \
+ __ Inst(IceType_i##Size, Encoded_GPR_##Dst(), Encoded_GPR_##Src()); \
+ __ setcc(Cond::Br_e, dwordAddress(ZeroFlag)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(ZeroFlag, 0u); \
+ test.run(); \
+ \
+ ASSERT_EQ((Expected == BitScanHelper<Value1, Size>::NoBitSet), \
+ test.contentsOfDword(ZeroFlag)) \
+ << TestString; \
+ if ((Expected != BitScanHelper<Value1, Size>::NoBitSet)) { \
+ ASSERT_EQ(Expected, test.Dst()) << TestString; \
+ } \
+ reset(); \
+ } while (0)
+
+#define TestImplRegAddr(Inst, Dst, Value1, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Inst ", " #Dst ", Addr, " #Value1 ", " #Size ")"; \
+ static constexpr uint32_t Expected = BitScanHelper<Value1, Size>::Inst; \
+ const uint32_t T0 = allocateDword(); \
+ const uint32_t ZeroFlag = allocateDword(); \
+ __ Inst(IceType_i##Size, Encoded_GPR_##Dst(), dwordAddress(T0)); \
+ __ setcc(Cond::Br_e, dwordAddress(ZeroFlag)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, Value1); \
+ test.setDwordTo(ZeroFlag, 0u); \
+ test.run(); \
+ \
+ ASSERT_EQ((Expected == BitScanHelper<Value1, Size>::NoBitSet), \
+ test.contentsOfDword(ZeroFlag)) \
+ << TestString; \
+ if (Expected != BitScanHelper<Value1, Size>::NoBitSet) { \
+ ASSERT_EQ(Expected, test.Dst()) << TestString; \
+ } \
+ reset(); \
+ } while (0)
+
+#define TestImplSize(Dst, Src, Value1, Size) \
+ do { \
+ TestImplRegReg(bsf, Dst, Src, Value1, Size); \
+ TestImplRegAddr(bsf, Dst, Value1, Size); \
+ TestImplRegReg(bsr, Dst, Src, Value1, Size); \
+ TestImplRegAddr(bsf, Dst, Value1, Size); \
+ } while (0)
+
+#define TestImplValue(Dst, Src, Value1) \
+ do { \
+ TestImplSize(Dst, Src, Value1, 16); \
+ TestImplSize(Dst, Src, Value1, 32); \
+ } while (0)
+
+#define TestImpl(Dst, Src) \
+ do { \
+ TestImplValue(Dst, Src, 0x80000001); \
+ TestImplValue(Dst, Src, 0x00000000); \
+ TestImplValue(Dst, Src, 0x80001000); \
+ TestImplValue(Dst, Src, 0x00FFFF00); \
+ } while (0)
+
+ TestImpl(r1, r2);
+ TestImpl(r2, r3);
+ TestImpl(r3, r4);
+ TestImpl(r4, r5);
+ TestImpl(r5, r6);
+ TestImpl(r6, r7);
+ TestImpl(r7, r8);
+ TestImpl(r8, r10);
+ TestImpl(r10, r11);
+ TestImpl(r11, r12);
+ TestImpl(r12, r13);
+ TestImpl(r13, r14);
+ TestImpl(r14, r15);
+ TestImpl(r15, r1);
+
+#undef TestImpl
+#undef TestImplValue
+#undef TestImplSize
+#undef TestImplRegAddr
+#undef TestImplRegReg
+}
+
+TEST_F(AssemblerX8664LowLevelTest, Nop) {
+#define TestImpl(Size, ...) \
+ do { \
+ static constexpr char TestString[] = "(" #Size ", " #__VA_ARGS__ ")"; \
+ __ nop(Size); \
+ ASSERT_EQ(Size##u, codeBytesSize()) << TestString; \
+ ASSERT_TRUE(verifyBytes<Size>(codeBytes(), __VA_ARGS__)) << TestString; \
+ reset(); \
+ } while (0);
+
+ TestImpl(1, 0x90);
+ TestImpl(2, 0x66, 0x90);
+ TestImpl(3, 0x0F, 0x1F, 0x00);
+ TestImpl(4, 0x0F, 0x1F, 0x40, 0x00);
+ TestImpl(5, 0x0F, 0x1F, 0x44, 0x00, 0x00);
+ TestImpl(6, 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00);
+ TestImpl(7, 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00);
+ TestImpl(8, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00);
+
+#undef TestImpl
+}
+
+TEST_F(AssemblerX8664LowLevelTest, Int3) {
+ __ int3();
+ static constexpr uint32_t ByteCount = 1;
+ ASSERT_EQ(ByteCount, codeBytesSize());
+ verifyBytes<ByteCount>(codeBytes(), 0xCC);
+}
+
+TEST_F(AssemblerX8664LowLevelTest, Hlt) {
+ __ hlt();
+ static constexpr uint32_t ByteCount = 1;
+ ASSERT_EQ(ByteCount, codeBytesSize());
+ verifyBytes<ByteCount>(codeBytes(), 0xF4);
+}
+
+TEST_F(AssemblerX8664LowLevelTest, Ud2) {
+ __ ud2();
+ static constexpr uint32_t ByteCount = 2;
+ ASSERT_EQ(ByteCount, codeBytesSize());
+ verifyBytes<ByteCount>(codeBytes(), 0x0F, 0x0B);
+}
+
+TEST_F(AssemblerX8664Test, Jmp) {
+// TestImplReg uses jmp(Label), so jmp(Label) needs to be tested before it.
+#define TestImplAddr(Near) \
+ do { \
+ Label ForwardJmp; \
+ Label BackwardJmp; \
+ Label Done; \
+ \
+ __ jmp(&ForwardJmp, AssemblerX8664::k##Near##Jump); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ bind(&BackwardJmp); \
+ __ jmp(&Done, AssemblerX8664::k##Near##Jump); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ bind(&ForwardJmp); \
+ __ jmp(&BackwardJmp, AssemblerX8664::k##NearJump); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ bind(&Done); \
+ } while (0)
+
+#define TestImplReg(Dst) \
+ do { \
+ __ call(Immediate(16)); \
+ Label Done; \
+ __ jmp(&Done, AssemblerX8664::kNearJump); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ popl(Encoded_GPR_##Dst()); \
+ __ jmp(Encoded_GPR_##Dst()); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ hlt(); \
+ __ bind(&Done); \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ reset(); \
+ } while (0)
+
+ TestImplAddr(Near);
+ TestImplAddr(Far);
+
+ TestImplReg(r1);
+ TestImplReg(r2);
+ TestImplReg(r3);
+ TestImplReg(r4);
+ TestImplReg(r5);
+ TestImplReg(r6);
+ TestImplReg(r7);
+ TestImplReg(r8);
+ TestImplReg(r10);
+ TestImplReg(r11);
+ TestImplReg(r12);
+ TestImplReg(r13);
+ TestImplReg(r14);
+ TestImplReg(r15);
+
+#undef TestImplReg
+#undef TestImplAddr
+}
+
+TEST_F(AssemblerX8664LowLevelTest, Mfence) {
+ __ mfence();
+
+ static constexpr uint8_t ByteCount = 3;
+ ASSERT_EQ(ByteCount, codeBytesSize());
+ verifyBytes<ByteCount>(codeBytes(), 0x0F, 0xAE, 0xF0);
+}
+
+TEST_F(AssemblerX8664LowLevelTest, Lock) {
+ __ lock();
+
+ static constexpr uint8_t ByteCount = 1;
+ ASSERT_EQ(ByteCount, codeBytesSize());
+ verifyBytes<ByteCount>(codeBytes(), 0xF0);
+}
+
+TEST_F(AssemblerX8664Test, Xchg) {
+ static constexpr uint32_t Mask8 = 0x000000FF;
+ static constexpr uint32_t Mask16 = 0x0000FFFF;
+ static constexpr uint32_t Mask32 = 0xFFFFFFFF;
+
+#define TestImplAddrReg(Value0, Dst1, Value1, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Value0 ", " #Dst1 ", " #Value1 ", " #Size ")"; \
+ const uint32_t T0 = allocateDword(); \
+ const uint32_t V0 = (Value0)&Mask##Size; \
+ const uint32_t V1 = (Value1)&Mask##Size; \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst1(), Immediate(Value1)); \
+ __ xchg(IceType_i##Size, dwordAddress(T0), Encoded_GPR_##Dst1()); \
+ __ And(IceType_i32, Encoded_GPR_##Dst1(), Immediate(Mask##Size)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, V0); \
+ test.run(); \
+ \
+ ASSERT_EQ(V0, test.Dst1()) << TestString; \
+ ASSERT_EQ(V1, test.contentsOfDword(T0)) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplSize(Dst1, Size) \
+ do { \
+ TestImplAddrReg(0xa2b34567, Dst1, 0x0507ddee, Size); \
+ } while (0)
+
+#define TestImpl(Dst1) \
+ do { \
+ TestImplSize(Dst1, 8); \
+ TestImplSize(Dst1, 16); \
+ TestImplSize(Dst1, 32); \
+ } while (0)
+
+ TestImpl(r1);
+ TestImpl(r2);
+ TestImpl(r3);
+ TestImpl(r4);
+ TestImpl(r5);
+ TestImpl(r6);
+ TestImpl(r7);
+ TestImpl(r8);
+ TestImpl(r10);
+ TestImpl(r11);
+ TestImpl(r12);
+ TestImpl(r13);
+ TestImpl(r14);
+ TestImpl(r15);
+
+#undef TestImpl
+#undef TestImplSize
+#undef TestImplAddrReg
+}
+
+TEST_F(AssemblerX8664Test, Xadd) {
+ static constexpr bool NotLocked = false;
+ static constexpr bool Locked = true;
+
+ static constexpr uint32_t Mask8 = 0x000000FF;
+ static constexpr uint32_t Mask16 = 0x0000FFFF;
+ static constexpr uint32_t Mask32 = 0xFFFFFFFF;
+
+#define TestImplAddrReg(Value0, Dst1, Value1, LockedOrNot, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Value0 ", " #Dst1 ", " #Value1 ", " #Size ")"; \
+ const uint32_t T0 = allocateDword(); \
+ const uint32_t V0 = (Value0)&Mask##Size; \
+ const uint32_t V1 = (Value1)&Mask##Size; \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_##Dst1(), Immediate(Value1)); \
+ __ xadd(IceType_i##Size, dwordAddress(T0), Encoded_GPR_##Dst1(), \
+ LockedOrNot); \
+ __ And(IceType_i32, Encoded_GPR_##Dst1(), Immediate(Mask##Size)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, V0); \
+ test.run(); \
+ \
+ ASSERT_EQ(V0, test.Dst1()) << TestString; \
+ ASSERT_EQ(Mask##Size &(V1 + V0), test.contentsOfDword(T0)) << TestString; \
+ reset(); \
+ } while (0)
+
+#define TestImplSize(Dst1, Size) \
+ do { \
+ TestImplAddrReg(0xa2b34567, Dst1, 0x0507ddee, NotLocked, Size); \
+ TestImplAddrReg(0xa2b34567, Dst1, 0x0507ddee, Locked, Size); \
+ } while (0)
+
+#define TestImpl(Dst1) \
+ do { \
+ TestImplSize(Dst1, 8); \
+ TestImplSize(Dst1, 16); \
+ TestImplSize(Dst1, 32); \
+ } while (0)
+
+ TestImpl(r1);
+ TestImpl(r2);
+ TestImpl(r3);
+ TestImpl(r4);
+ TestImpl(r5);
+ TestImpl(r6);
+ TestImpl(r7);
+ TestImpl(r8);
+ TestImpl(r10);
+ TestImpl(r11);
+ TestImpl(r12);
+ TestImpl(r13);
+ TestImpl(r14);
+ TestImpl(r15);
+
+#undef TestImpl
+#undef TestImplSize
+#undef TestImplAddrReg
+}
+
+TEST_F(AssemblerX8664LowLevelTest, Xadd) {
+ static constexpr bool NotLocked = false;
+ static constexpr bool Locked = true;
+
+ // Ensures that xadd emits a lock prefix accordingly.
+ {
+ __ xadd(IceType_i8, Address::Absolute(0x1FF00), Encoded_GPR_r14(),
+ NotLocked);
+ static constexpr uint8_t ByteCountNotLocked8 = 8;
+ ASSERT_EQ(ByteCountNotLocked8, codeBytesSize());
+ ASSERT_TRUE(verifyBytes<ByteCountNotLocked8>(codeBytes(), 0x44, 0x0F, 0xC0,
+ 0x35, 0x00, 0xFF, 0x01, 0x00));
+ reset();
+
+ __ xadd(IceType_i8, Address::Absolute(0x1FF00), Encoded_GPR_r14(), Locked);
+ static constexpr uint8_t ByteCountLocked8 = 1 + ByteCountNotLocked8;
+ ASSERT_EQ(ByteCountLocked8, codeBytesSize());
+ ASSERT_TRUE(verifyBytes<ByteCountLocked8>(
+ codeBytes(), 0xF0, 0x44, 0x0F, 0xC0, 0x35, 0x00, 0xFF, 0x01, 0x00));
+ reset();
+ }
+
+ {
+ __ xadd(IceType_i16, Address::Absolute(0x1FF00), Encoded_GPR_r14(),
+ NotLocked);
+ static constexpr uint8_t ByteCountNotLocked16 = 9;
+ ASSERT_EQ(ByteCountNotLocked16, codeBytesSize());
+ ASSERT_TRUE(verifyBytes<ByteCountNotLocked16>(
+ codeBytes(), 0x66, 0x44, 0x0F, 0xC1, 0x35, 0x00, 0xFF, 0x01, 0x00));
+ reset();
+
+ __ xadd(IceType_i16, Address::Absolute(0x1FF00), Encoded_GPR_r14(), Locked);
+ static constexpr uint8_t ByteCountLocked16 = 1 + ByteCountNotLocked16;
+ ASSERT_EQ(ByteCountLocked16, codeBytesSize());
+ ASSERT_TRUE(verifyBytes<ByteCountLocked16>(codeBytes(), 0x66, 0xF0, 0x44,
+ 0x0F, 0xC1, 0x35, 0x00, 0xFF,
+ 0x01, 0x00));
+ reset();
+ }
+
+ {
+ __ xadd(IceType_i32, Address::Absolute(0x1FF00), Encoded_GPR_r14(),
+ NotLocked);
+ static constexpr uint8_t ByteCountNotLocked32 = 8;
+ ASSERT_EQ(ByteCountNotLocked32, codeBytesSize());
+ ASSERT_TRUE(verifyBytes<ByteCountNotLocked32>(
+ codeBytes(), 0x44, 0x0F, 0xC1, 0x35, 0x00, 0xFF, 0x01, 0x00));
+ reset();
+
+ __ xadd(IceType_i32, Address::Absolute(0x1FF00), Encoded_GPR_r14(), Locked);
+ static constexpr uint8_t ByteCountLocked32 = 1 + ByteCountNotLocked32;
+ ASSERT_EQ(ByteCountLocked32, codeBytesSize());
+ ASSERT_TRUE(verifyBytes<ByteCountLocked32>(
+ codeBytes(), 0xF0, 0x44, 0x0F, 0xC1, 0x35, 0x00, 0xFF, 0x01, 0x00));
+ reset();
+ }
+}
+
+TEST_F(AssemblerX8664LowLevelTest, EmitSegmentOverride) {
+#define TestImpl(Prefix) \
+ do { \
+ static constexpr uint8_t ByteCount = 1; \
+ __ emitSegmentOverride(Prefix); \
+ ASSERT_EQ(ByteCount, codeBytesSize()) << Prefix; \
+ ASSERT_TRUE(verifyBytes<ByteCount>(codeBytes(), Prefix)); \
+ reset(); \
+ } while (0)
+
+ TestImpl(0x26);
+ TestImpl(0x2E);
+ TestImpl(0x36);
+ TestImpl(0x3E);
+ TestImpl(0x64);
+ TestImpl(0x65);
+ TestImpl(0x66);
+ TestImpl(0x67);
+
+#undef TestImpl
+}
+
+TEST_F(AssemblerX8664Test, Cmpxchg8b) {
+ static constexpr bool NotLocked = false;
+ static constexpr bool Locked = true;
+
+#define TestImpl(Value0, Value1, ValueMem, LockedOrNot) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Value0 ", " #Value1 ", " #ValueMem ", " #LockedOrNot ")"; \
+ const uint32_t T0 = allocateQword(); \
+ static constexpr uint64_t V0 = ValueMem; \
+ const uint32_t ZeroFlag = allocateDword(); \
+ \
+ __ mov(IceType_i32, Encoded_GPR_eax(), \
+ Immediate(uint64_t(Value0) & 0xFFFFFFFF)); \
+ __ mov(IceType_i32, Encoded_GPR_edx(), Immediate(uint64_t(Value0) >> 32)); \
+ __ mov(IceType_i32, Encoded_GPR_ebx(), \
+ Immediate(uint64_t(Value1) & 0xFFFFFFFF)); \
+ __ mov(IceType_i32, Encoded_GPR_ecx(), Immediate(uint64_t(Value1) >> 32)); \
+ __ cmpxchg8b(dwordAddress(T0), LockedOrNot); \
+ __ setcc(Cond::Br_e, dwordAddress(ZeroFlag)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setQwordTo(T0, V0); \
+ test.setDwordTo(ZeroFlag, uint32_t(0xFF)); \
+ test.run(); \
+ \
+ if (V0 == (Value0)) { \
+ ASSERT_EQ(uint64_t(Value1), test.contentsOfQword(T0)) << TestString; \
+ ASSERT_EQ(1u, test.contentsOfDword(ZeroFlag)) << TestString; \
+ } else { \
+ ASSERT_EQ(uint64_t(ValueMem) & 0xFFFFFFFF, test.eax()) << TestString; \
+ ASSERT_EQ((uint64_t(ValueMem) >> 32) & 0xFFFFFFFF, test.edx()) \
+ << TestString; \
+ ASSERT_EQ(0u, test.contentsOfDword(ZeroFlag)) << TestString; \
+ } \
+ reset(); \
+ } while (0)
+
+ TestImpl(0x98987676543210ull, 0x1, 0x98987676543210ull, NotLocked);
+ TestImpl(0x98987676543210ull, 0x1, 0x98987676543210ull, Locked);
+ TestImpl(0x98987676543210ull, 0x1, 0x98987676543211ull, NotLocked);
+ TestImpl(0x98987676543210ull, 0x1, 0x98987676543211ull, Locked);
+
+#undef TestImpl
+}
+
+TEST_F(AssemblerX8664LowLevelTest, Cmpxchg8b) {
+ static constexpr bool NotLocked = false;
+ static constexpr bool Locked = true;
+
+ // Ensures that cmpxchg8b emits a lock prefix accordingly.
+ __ cmpxchg8b(Address::Absolute(0x1FF00), NotLocked);
+ static constexpr uint8_t ByteCountNotLocked = 7;
+ ASSERT_EQ(ByteCountNotLocked, codeBytesSize());
+ ASSERT_TRUE(verifyBytes<ByteCountNotLocked>(codeBytes(), 0x0F, 0xC7, 0x0D,
+ 0x00, 0xFF, 0x01, 0x00));
+ reset();
+
+ __ cmpxchg8b(Address::Absolute(0x1FF00), Locked);
+ static constexpr uint8_t ByteCountLocked = 1 + ByteCountNotLocked;
+ ASSERT_EQ(ByteCountLocked, codeBytesSize());
+ ASSERT_TRUE(verifyBytes<ByteCountLocked>(codeBytes(), 0xF0, 0x0F, 0xC7, 0x0D,
+ 0x00, 0xFF, 0x01, 0x00));
+ reset();
+}
+
+TEST_F(AssemblerX8664Test, Cmpxchg) {
+ static constexpr bool NotLocked = false;
+ static constexpr bool Locked = true;
+
+ static constexpr uint32_t Mask8 = 0x000000FF;
+ static constexpr uint32_t Mask16 = 0x0000FFFF;
+ static constexpr uint32_t Mask32 = 0xFFFFFFFF;
+
+#define TestImplAddrReg(Value0, Src, Value1, ValueMem, LockedOrNot, Size) \
+ do { \
+ static constexpr char TestString[] = \
+ "(" #Value0 ", " #Src ", " #Value1 ", " #ValueMem ", " #LockedOrNot \
+ ", " #Size ")"; \
+ const uint32_t T0 = allocateDword(); \
+ static constexpr uint32_t V0 = (ValueMem)&Mask##Size; \
+ const uint32_t ZeroFlag = allocateDword(); \
+ \
+ __ mov(IceType_i##Size, Encoded_GPR_eax(), \
+ Immediate((Value0)&Mask##Size)); \
+ __ mov(IceType_i##Size, Encoded_GPR_##Src(), \
+ Immediate((Value1)&Mask##Size)); \
+ __ cmpxchg(IceType_i##Size, dwordAddress(T0), Encoded_GPR_##Src(), \
+ LockedOrNot); \
+ __ setcc(Cond::Br_e, dwordAddress(ZeroFlag)); \
+ \
+ AssembledTest test = assemble(); \
+ test.setDwordTo(T0, V0); \
+ test.setDwordTo(ZeroFlag, uint32_t(0xFF)); \
+ test.run(); \
+ \
+ if (V0 == (Mask##Size & (Value0))) { \
+ ASSERT_EQ(uint32_t((Value1)&Mask##Size), test.contentsOfDword(T0)) \
+ << TestString; \
+ ASSERT_EQ(1u, test.contentsOfDword(ZeroFlag)) << TestString; \
+ } else { \
+ ASSERT_EQ(uint32_t((ValueMem)&Mask##Size), test.eax()) << TestString; \
+ ASSERT_EQ(0u, test.contentsOfDword(ZeroFlag)) << TestString; \
+ } \
+ reset(); \
+ } while (0)
+
+#define TestImplValue(Value0, Src, Value1, ValueMem, LockedOrNot) \
+ do { \
+ TestImplAddrReg(Value0, Src, Value1, ValueMem, LockedOrNot, 8); \
+ TestImplAddrReg(Value0, Src, Value1, ValueMem, LockedOrNot, 16); \
+ TestImplAddrReg(Value0, Src, Value1, ValueMem, LockedOrNot, 32); \
+ } while (0)
+
+#define TestImpl(Src, LockedOrNot) \
+ do { \
+ TestImplValue(0xFFFFFFFF, Src, 0x1, 0xFFFFFFFF, LockedOrNot); \
+ TestImplValue(0x0FFF0F0F, Src, 0x1, 0xFFFFFFFF, LockedOrNot); \
+ } while (0)
+
+ TestImpl(r2, Locked);
+ TestImpl(r2, NotLocked);
+ TestImpl(r3, Locked);
+ TestImpl(r3, NotLocked);
+ TestImpl(r4, Locked);
+ TestImpl(r4, NotLocked);
+ TestImpl(r5, Locked);
+ TestImpl(r5, NotLocked);
+ TestImpl(r6, Locked);
+ TestImpl(r6, NotLocked);
+ TestImpl(r7, Locked);
+ TestImpl(r7, NotLocked);
+ TestImpl(r8, Locked);
+ TestImpl(r8, NotLocked);
+ TestImpl(r10, Locked);
+ TestImpl(r10, NotLocked);
+ TestImpl(r11, Locked);
+ TestImpl(r11, NotLocked);
+ TestImpl(r12, Locked);
+ TestImpl(r12, NotLocked);
+ TestImpl(r13, Locked);
+ TestImpl(r13, NotLocked);
+ TestImpl(r14, Locked);
+ TestImpl(r14, NotLocked);
+ TestImpl(r15, Locked);
+ TestImpl(r15, NotLocked);
+
+#undef TestImpl
+#undef TestImplValue
+#undef TestImplAddrReg
+}
+
+TEST_F(AssemblerX8664LowLevelTest, Cmpxchg) {
+ static constexpr bool NotLocked = false;
+ static constexpr bool Locked = true;
+
+ // Ensures that cmpxchg emits a lock prefix accordingly.
+ {
+ __ cmpxchg(IceType_i8, Address::Absolute(0x1FF00), Encoded_GPR_r14(),
+ NotLocked);
+ static constexpr uint8_t ByteCountNotLocked8 = 8;
+ ASSERT_EQ(ByteCountNotLocked8, codeBytesSize());
+ ASSERT_TRUE(verifyBytes<ByteCountNotLocked8>(codeBytes(), 0x44, 0x0F, 0xB0,
+ 0x35, 0x00, 0xFF, 0x01, 0x00));
+ reset();
+
+ __ cmpxchg(IceType_i8, Address::Absolute(0x1FF00), Encoded_GPR_r14(),
+ Locked);
+ static constexpr uint8_t ByteCountLocked8 = 1 + ByteCountNotLocked8;
+ ASSERT_EQ(ByteCountLocked8, codeBytesSize());
+ ASSERT_TRUE(verifyBytes<ByteCountLocked8>(
+ codeBytes(), 0xF0, 0x44, 0x0F, 0xB0, 0x35, 0x00, 0xFF, 0x01, 0x00));
+ reset();
+ }
+
+ {
+ __ cmpxchg(IceType_i16, Address::Absolute(0x1FF00), Encoded_GPR_r14(),
+ NotLocked);
+ static constexpr uint8_t ByteCountNotLocked16 = 9;
+ ASSERT_EQ(ByteCountNotLocked16, codeBytesSize());
+ ASSERT_TRUE(verifyBytes<ByteCountNotLocked16>(
+ codeBytes(), 0x66, 0x44, 0x0F, 0xB1, 0x35, 0x00, 0xFF, 0x01, 0x00));
+ reset();
+
+ __ cmpxchg(IceType_i16, Address::Absolute(0x1FF00), Encoded_GPR_r14(),
+ Locked);
+ static constexpr uint8_t ByteCountLocked16 = 1 + ByteCountNotLocked16;
+ ASSERT_EQ(ByteCountLocked16, codeBytesSize());
+ ASSERT_TRUE(verifyBytes<ByteCountLocked16>(codeBytes(), 0x66, 0xF0, 0x44,
+ 0x0F, 0xB1, 0x35, 0x00, 0xFF,
+ 0x01, 0x00));
+ reset();
+ }
+
+ {
+ __ cmpxchg(IceType_i32, Address::Absolute(0x1FF00), Encoded_GPR_r14(),
+ NotLocked);
+ static constexpr uint8_t ByteCountNotLocked32 = 8;
+ ASSERT_EQ(ByteCountNotLocked32, codeBytesSize());
+ ASSERT_TRUE(verifyBytes<ByteCountNotLocked32>(
+ codeBytes(), 0x44, 0x0F, 0xB1, 0x35, 0x00, 0xFF, 0x01, 0x00));
+ reset();
+
+ __ cmpxchg(IceType_i32, Address::Absolute(0x1FF00), Encoded_GPR_r14(),
+ Locked);
+ static constexpr uint8_t ByteCountLocked32 = 1 + ByteCountNotLocked32;
+ ASSERT_EQ(ByteCountLocked32, codeBytesSize());
+ ASSERT_TRUE(verifyBytes<ByteCountLocked32>(
+ codeBytes(), 0xF0, 0x44, 0x0F, 0xB1, 0x35, 0x00, 0xFF, 0x01, 0x00));
+ reset();
+ }
+}
+
+TEST_F(AssemblerX8664Test, Set1ps) {
+#define TestImpl(Xmm, Src, Imm) \
+ do { \
+ __ set1ps(Encoded_Xmm_##Xmm(), Encoded_GPR_##Src(), Immediate(Imm)); \
+ \
+ AssembledTest test = assemble(); \
+ test.run(); \
+ \
+ const Dqword Expected((uint64_t(Imm) << 32) | uint32_t(Imm), \
+ (uint64_t(Imm) << 32) | uint32_t(Imm)); \
+ ASSERT_EQ(Expected, test.Xmm<Dqword>()) \
+ << "(" #Xmm ", " #Src ", " #Imm ")"; \
+ reset(); \
+ } while (0)
+
+ TestImpl(xmm0, r1, 1);
+ TestImpl(xmm1, r2, 12);
+ TestImpl(xmm2, r3, 22);
+ TestImpl(xmm3, r4, 54);
+ TestImpl(xmm4, r5, 80);
+ TestImpl(xmm5, r6, 32);
+ TestImpl(xmm6, r7, 55);
+ TestImpl(xmm7, r8, 44);
+ TestImpl(xmm8, r10, 10);
+ TestImpl(xmm9, r11, 155);
+ TestImpl(xmm10, r12, 165);
+ TestImpl(xmm11, r13, 170);
+ TestImpl(xmm12, r14, 200);
+ TestImpl(xmm13, r15, 124);
+ TestImpl(xmm14, r1, 101);
+ TestImpl(xmm15, r2, 166);
+
+#undef TestImpl
+}
+
+#undef __
+
+} // end of anonymous namespace
+} // end of namespace X8664
+} // end of namespace Ice
« src/IceRegistersX8664.h ('K') | « unittest/IceAssemblerX8632Test.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698