| OLD | NEW |
| (Empty) | |
| 1 |
| 2 //===- subzero/unittest/unittest/AssemblerX8664/TestUtil.h ------*- C++ -*-===// |
| 3 // |
| 4 // The Subzero Code Generator |
| 5 // |
| 6 // This file is distributed under the University of Illinois Open Source |
| 7 // License. See LICENSE.TXT for details. |
| 8 // |
| 9 //===----------------------------------------------------------------------===// |
| 10 // |
| 11 // Utility classes for testing the X8664 Assembler. |
| 12 // |
| 13 //===----------------------------------------------------------------------===// |
| 14 |
| 15 #ifndef ASSEMBLERX8664_TESTUTIL_H_ |
| 16 #define ASSEMBLERX8664_TESTUTIL_H_ |
| 17 |
| 18 #include "IceAssemblerX8664.h" |
| 19 |
| 20 #include "gtest/gtest.h" |
| 21 |
| 22 #include <cassert> |
| 23 #include <sys/mman.h> |
| 24 |
| 25 namespace Ice { |
| 26 namespace X8664 { |
| 27 namespace Test { |
| 28 |
| 29 class AssemblerX8664TestBase : public ::testing::Test { |
| 30 protected: |
| 31 using Address = AssemblerX8664::Traits::Address; |
| 32 using ByteRegister = AssemblerX8664::Traits::ByteRegister; |
| 33 using Cond = AssemblerX8664::Traits::Cond; |
| 34 using GPRRegister = AssemblerX8664::Traits::GPRRegister; |
| 35 using Traits = AssemblerX8664::Traits; |
| 36 using XmmRegister = AssemblerX8664::Traits::XmmRegister; |
| 37 |
| 38 // The following are "nicknames" for all possible GPRs in x86-64. With those, we |
| 39 // can use, e.g., |
| 40 // |
| 41 // Encoded_GPR_al() |
| 42 // |
| 43 // instead of GPRRegister::Encoded_Reg_eax for 8 bit operands. They also |
| 44 // introduce "regular" nicknames for legacy x86-32 register (e.g., eax becomes |
| 45 // r1; esp, r0). |
| 46 #define LegacyRegAliases(NewName, Name64, Name32, Name16, Name8) \ |
| 47 static constexpr GPRRegister Encoded_GPR_##NewName() { \ |
| 48 return GPRRegister::Encoded_Reg_##Name32; \ |
| 49 } \ |
| 50 static constexpr GPRRegister Encoded_GPR_##NewName##q() { \ |
| 51 return GPRRegister::Encoded_Reg_##Name32; \ |
| 52 } \ |
| 53 static constexpr GPRRegister Encoded_GPR_##NewName##d() { \ |
| 54 return GPRRegister::Encoded_Reg_##Name32; \ |
| 55 } \ |
| 56 static constexpr GPRRegister Encoded_GPR_##NewName##w() { \ |
| 57 return GPRRegister::Encoded_Reg_##Name32; \ |
| 58 } \ |
| 59 static constexpr GPRRegister Encoded_GPR_##NewName##l() { \ |
| 60 return GPRRegister::Encoded_Reg_##Name32; \ |
| 61 } \ |
| 62 static constexpr GPRRegister Encoded_GPR_##Name64() { \ |
| 63 return GPRRegister::Encoded_Reg_##Name32; \ |
| 64 } \ |
| 65 static constexpr GPRRegister Encoded_GPR_##Name32() { \ |
| 66 return GPRRegister::Encoded_Reg_##Name32; \ |
| 67 } \ |
| 68 static constexpr GPRRegister Encoded_GPR_##Name16() { \ |
| 69 return GPRRegister::Encoded_Reg_##Name32; \ |
| 70 } \ |
| 71 static constexpr GPRRegister Encoded_GPR_##Name8() { \ |
| 72 return GPRRegister::Encoded_Reg_##Name32; \ |
| 73 } |
| 74 #define NewRegAliases(Name) \ |
| 75 static constexpr GPRRegister Encoded_GPR_##Name() { \ |
| 76 return GPRRegister::Encoded_Reg_##Name##d; \ |
| 77 } \ |
| 78 static constexpr GPRRegister Encoded_GPR_##Name##q() { \ |
| 79 return GPRRegister::Encoded_Reg_##Name##d; \ |
| 80 } \ |
| 81 static constexpr GPRRegister Encoded_GPR_##Name##d() { \ |
| 82 return GPRRegister::Encoded_Reg_##Name##d; \ |
| 83 } \ |
| 84 static constexpr GPRRegister Encoded_GPR_##Name##w() { \ |
| 85 return GPRRegister::Encoded_Reg_##Name##d; \ |
| 86 } \ |
| 87 static constexpr GPRRegister Encoded_GPR_##Name##l() { \ |
| 88 return GPRRegister::Encoded_Reg_##Name##d; \ |
| 89 } |
| 90 #define XmmRegAliases(Name) \ |
| 91 static constexpr XmmRegister Encoded_Xmm_##Name() { \ |
| 92 return XmmRegister::Encoded_Reg_##Name; \ |
| 93 } |
| 94 LegacyRegAliases(r0, rsp, esp, sp, spl); |
| 95 LegacyRegAliases(r1, rax, eax, ax, al); |
| 96 LegacyRegAliases(r2, rbx, ebx, bx, bl); |
| 97 LegacyRegAliases(r3, rcx, ecx, cx, cl); |
| 98 LegacyRegAliases(r4, rdx, edx, dx, dl); |
| 99 LegacyRegAliases(r5, rbp, ebp, bp, bpl); |
| 100 LegacyRegAliases(r6, rsi, esi, si, sil); |
| 101 LegacyRegAliases(r7, rdi, edi, di, dil); |
| 102 NewRegAliases(r8); |
| 103 NewRegAliases(r9); |
| 104 NewRegAliases(r10); |
| 105 NewRegAliases(r11); |
| 106 NewRegAliases(r12); |
| 107 NewRegAliases(r13); |
| 108 NewRegAliases(r14); |
| 109 NewRegAliases(r15); |
| 110 XmmRegAliases(xmm0); |
| 111 XmmRegAliases(xmm1); |
| 112 XmmRegAliases(xmm2); |
| 113 XmmRegAliases(xmm3); |
| 114 XmmRegAliases(xmm4); |
| 115 XmmRegAliases(xmm5); |
| 116 XmmRegAliases(xmm6); |
| 117 XmmRegAliases(xmm7); |
| 118 XmmRegAliases(xmm8); |
| 119 XmmRegAliases(xmm9); |
| 120 XmmRegAliases(xmm10); |
| 121 XmmRegAliases(xmm11); |
| 122 XmmRegAliases(xmm12); |
| 123 XmmRegAliases(xmm13); |
| 124 XmmRegAliases(xmm14); |
| 125 XmmRegAliases(xmm15); |
| 126 #undef XmmRegAliases |
| 127 #undef NewRegAliases |
| 128 #undef LegacyRegAliases |
| 129 |
| 130 AssemblerX8664TestBase() { reset(); } |
| 131 |
| 132 void reset() { Assembler.reset(new AssemblerX8664()); } |
| 133 |
| 134 AssemblerX8664 *assembler() const { return Assembler.get(); } |
| 135 |
| 136 size_t codeBytesSize() const { return Assembler->getBufferView().size(); } |
| 137 |
| 138 const uint8_t *codeBytes() const { |
| 139 return static_cast<const uint8_t *>( |
| 140 static_cast<const void *>(Assembler->getBufferView().data())); |
| 141 } |
| 142 |
| 143 private: |
| 144 std::unique_ptr<AssemblerX8664> Assembler; |
| 145 }; |
| 146 |
| 147 // __ is a helper macro. It allows test cases to emit X8664 assembly |
| 148 // instructions with |
| 149 // |
| 150 // __ mov(GPRRegister::Reg_Eax, 1); |
| 151 // __ ret(); |
| 152 // |
| 153 // and so on. The idea of having this was "stolen" from dart's unit tests. |
| 154 #define __ (this->assembler())-> |
| 155 |
| 156 // AssemblerX8664LowLevelTest verify that the "basic" instructions the tests |
| 157 // rely on are encoded correctly. Therefore, instead of executing the assembled |
| 158 // code, these tests will verify that the assembled bytes are sane. |
| 159 class AssemblerX8664LowLevelTest : public AssemblerX8664TestBase { |
| 160 protected: |
| 161 // verifyBytes is a template helper that takes a Buffer, and a variable number |
| 162 // of bytes. As the name indicates, it is used to verify the bytes for an |
| 163 // instruction encoding. |
| 164 template <int N, int I> static bool verifyBytes(const uint8_t *) { |
| 165 static_assert(I == N, "Invalid template instantiation."); |
| 166 return true; |
| 167 } |
| 168 |
| 169 template <int N, int I = 0, typename... Args> |
| 170 static bool verifyBytes(const uint8_t *Buffer, uint8_t Byte, |
| 171 Args... OtherBytes) { |
| 172 static_assert(I < N, "Invalid template instantiation."); |
| 173 EXPECT_EQ(Byte, Buffer[I]) << "Byte " << (I + 1) << " of " << N; |
| 174 return verifyBytes<N, I + 1>(Buffer, OtherBytes...) && Buffer[I] == Byte; |
| 175 } |
| 176 }; |
| 177 |
| 178 // After these tests we should have a sane environment; we know the following |
| 179 // work: |
| 180 // |
| 181 // (*) zeroing eax, ebx, ecx, edx, edi, and esi; |
| 182 // (*) call $4 instruction (used for ip materialization); |
| 183 // (*) register push and pop; |
| 184 // (*) cmp reg, reg; and |
| 185 // (*) returning from functions. |
| 186 // |
| 187 // We can now dive into testing each emitting method in AssemblerX8664. Each |
| 188 // test will emit some instructions for performing the test. The assembled |
| 189 // instructions will operate in a "safe" environment. All x86-64 registers are |
| 190 // spilled to the program stack, and the registers are then zeroed out, with the |
| 191 // exception of %esp and %r9. |
| 192 // |
| 193 // The jitted code and the unittest code will share the same stack. Therefore, |
| 194 // test harnesses need to ensure it does not leave anything it pushed on the |
| 195 // stack. |
| 196 // |
| 197 // %r9 is initialized with a pointer for rIP-based addressing. This pointer is |
| 198 // used for position-independent access to a scratchpad area for use in tests. |
| 199 // In theory we could use rip-based addressing, but in practice that would |
| 200 // require creating fixups, which would, in turn, require creating a global |
| 201 // context. We therefore rely on the same technique used for pic code in x86-32 |
| 202 // (i.e., IP materialization). Upon a test start up, a call(NextInstruction) is |
| 203 // executed. We then pop the return address from the stack, and use it for pic |
| 204 // addressing. |
| 205 // |
| 206 // The jitted code will look like the following: |
| 207 // |
| 208 // test: |
| 209 // push %r9 |
| 210 // call test$materialize_ip |
| 211 // test$materialize_ip: <<------- %r9 will point here |
| 212 // pop %r9 |
| 213 // push %rax |
| 214 // push %rbx |
| 215 // push %rcx |
| 216 // push %rdx |
| 217 // push %rbp |
| 218 // push %rdi |
| 219 // push %rsi |
| 220 // push %r8 |
| 221 // push %r10 |
| 222 // push %r11 |
| 223 // push %r12 |
| 224 // push %r13 |
| 225 // push %r14 |
| 226 // push %r15 |
| 227 // mov $0, %rax |
| 228 // mov $0, %rbx |
| 229 // mov $0, %rcx |
| 230 // mov $0, %rdx |
| 231 // mov $0, %rbp |
| 232 // mov $0, %rdi |
| 233 // mov $0, %rsi |
| 234 // mov $0, %r8 |
| 235 // mov $0, %r10 |
| 236 // mov $0, %r11 |
| 237 // mov $0, %r12 |
| 238 // mov $0, %r13 |
| 239 // mov $0, %r14 |
| 240 // mov $0, %r15 |
| 241 // |
| 242 // << test code goes here >> |
| 243 // |
| 244 // mov %rax, { 0 + $ScratchpadOffset}(%rbp) |
| 245 // mov %rbx, { 8 + $ScratchpadOffset}(%rbp) |
| 246 // mov %rcx, { 16 + $ScratchpadOffset}(%rbp) |
| 247 // mov %rdx, { 24 + $ScratchpadOffset}(%rbp) |
| 248 // mov %rdi, { 32 + $ScratchpadOffset}(%rbp) |
| 249 // mov %rsi, { 40 + $ScratchpadOffset}(%rbp) |
| 250 // mov %rbp, { 48 + $ScratchpadOffset}(%rbp) |
| 251 // mov %rsp, { 56 + $ScratchpadOffset}(%rbp) |
| 252 // mov %r8, { 64 + $ScratchpadOffset}(%rbp) |
| 253 // mov %r9, { 72 + $ScratchpadOffset}(%rbp) |
| 254 // mov %r10, { 80 + $ScratchpadOffset}(%rbp) |
| 255 // mov %r11, { 88 + $ScratchpadOffset}(%rbp) |
| 256 // mov %r12, { 96 + $ScratchpadOffset}(%rbp) |
| 257 // mov %r13, {104 + $ScratchpadOffset}(%rbp) |
| 258 // mov %r14, {112 + $ScratchpadOffset}(%rbp) |
| 259 // mov %r15, {120 + $ScratchpadOffset}(%rbp) |
| 260 // movups %xmm0, {128 + $ScratchpadOffset}(%rbp) |
| 261 // movups %xmm1, {136 + $ScratchpadOffset}(%rbp) |
| 262 // movups %xmm2, {144 + $ScratchpadOffset}(%rbp) |
| 263 // movups %xmm3, {152 + $ScratchpadOffset}(%rbp) |
| 264 // movups %xmm4, {160 + $ScratchpadOffset}(%rbp) |
| 265 // movups %xmm5, {168 + $ScratchpadOffset}(%rbp) |
| 266 // movups %xmm6, {176 + $ScratchpadOffset}(%rbp) |
| 267 // movups %xmm7, {184 + $ScratchpadOffset}(%rbp) |
| 268 // movups %xmm8, {192 + $ScratchpadOffset}(%rbp) |
| 269 // movups %xmm9, {200 + $ScratchpadOffset}(%rbp) |
| 270 // movups %xmm10, {208 + $ScratchpadOffset}(%rbp) |
| 271 // movups %xmm11, {216 + $ScratchpadOffset}(%rbp) |
| 272 // movups %xmm12, {224 + $ScratchpadOffset}(%rbp) |
| 273 // movups %xmm13, {232 + $ScratchpadOffset}(%rbp) |
| 274 // movups %xmm14, {240 + $ScratchpadOffset}(%rbp) |
| 275 // movups %xmm15, {248 + $ScratchpadOffset}(%rbp) |
| 276 // |
| 277 // pop %r15 |
| 278 // pop %r14 |
| 279 // pop %r13 |
| 280 // pop %r12 |
| 281 // pop %r11 |
| 282 // pop %r10 |
| 283 // pop %r8 |
| 284 // pop %rsi |
| 285 // pop %rdi |
| 286 // pop %rbp |
| 287 // pop %rdx |
| 288 // pop %rcx |
| 289 // pop %rbx |
| 290 // pop %rax |
| 291 // pop %r9 |
| 292 // ret |
| 293 // |
| 294 // << ... >> |
| 295 // |
| 296 // scratchpad: <<------- accessed via $Offset(%ebp) |
| 297 // |
| 298 // << test scratch area >> |
| 299 // |
| 300 // TODO(jpp): test the |
| 301 // |
| 302 // mov %reg, $Offset(%ebp) |
| 303 // movups %xmm, $Offset(%ebp) |
| 304 // |
| 305 // encodings using the low level assembler test ensuring that the register |
| 306 // values can be written to the scratchpad area. |
| 307 // |
| 308 // r9 was deliberately choosen so that every instruction accessing memory would |
| 309 // fail if the rex prefix was not emitted for it. |
| 310 class AssemblerX8664Test : public AssemblerX8664TestBase { |
| 311 protected: |
| 312 // Dqword is used to represent 128-bit data types. The Dqword's contents are |
| 313 // the same as the contents read from memory. Tests can then use the union |
| 314 // members to verify the tests' outputs. |
| 315 // |
| 316 // NOTE: We want sizeof(Dqword) == sizeof(uint64_t) * 2. In other words, we |
| 317 // want Dqword's contents to be **exactly** what the memory contents were so |
| 318 // that we can do, e.g., |
| 319 // |
| 320 // ... |
| 321 // float Ret[4]; |
| 322 // // populate Ret |
| 323 // return *reinterpret_cast<Dqword *>(&Ret); |
| 324 // |
| 325 // While being an ugly hack, this kind of return statements are used |
| 326 // extensively in the PackedArith (see below) class. |
| 327 union Dqword { |
| 328 template <typename T0, typename T1, typename T2, typename T3, |
| 329 typename = typename std::enable_if< |
| 330 std::is_floating_point<T0>::value>::type> |
| 331 Dqword(T0 F0, T1 F1, T2 F2, T3 F3) { |
| 332 F32[0] = F0; |
| 333 F32[1] = F1; |
| 334 F32[2] = F2; |
| 335 F32[3] = F3; |
| 336 } |
| 337 |
| 338 template <typename T> |
| 339 Dqword(typename std::enable_if<std::is_same<T, int32_t>::value, T>::type I0, |
| 340 T I1, T I2, T I3) { |
| 341 I32[0] = I0; |
| 342 I32[1] = I1; |
| 343 I32[2] = I2; |
| 344 I32[3] = I3; |
| 345 } |
| 346 |
| 347 template <typename T> |
| 348 Dqword(typename std::enable_if<std::is_same<T, uint64_t>::value, T>::type |
| 349 U64_0, |
| 350 T U64_1) { |
| 351 U64[0] = U64_0; |
| 352 U64[1] = U64_1; |
| 353 } |
| 354 |
| 355 template <typename T> |
| 356 Dqword(typename std::enable_if<std::is_same<T, double>::value, T>::type D0, |
| 357 T D1) { |
| 358 F64[0] = D0; |
| 359 F64[1] = D1; |
| 360 } |
| 361 |
| 362 bool operator==(const Dqword &Rhs) const { |
| 363 return std::memcmp(this, &Rhs, sizeof(*this)) == 0; |
| 364 } |
| 365 |
| 366 double F64[2]; |
| 367 uint64_t U64[2]; |
| 368 int64_t I64[2]; |
| 369 |
| 370 float F32[4]; |
| 371 uint32_t U32[4]; |
| 372 int32_t I32[4]; |
| 373 |
| 374 uint16_t U16[8]; |
| 375 int16_t I16[8]; |
| 376 |
| 377 uint8_t U8[16]; |
| 378 int8_t I8[16]; |
| 379 |
| 380 private: |
| 381 Dqword() = delete; |
| 382 }; |
| 383 |
| 384 // As stated, we want this condition to hold, so we assert. |
| 385 static_assert(sizeof(Dqword) == 2 * sizeof(uint64_t), |
| 386 "Dqword has the wrong size."); |
| 387 |
| 388 // PackedArith is an interface provider for Dqwords. PackedArith's C argument |
| 389 // is the undelying Dqword's type, which is then used so that we can define |
| 390 // operators in terms of C++ operators on the underlying elements' type. |
| 391 template <typename C> class PackedArith { |
| 392 public: |
| 393 static constexpr uint32_t N = sizeof(Dqword) / sizeof(C); |
| 394 static_assert(N * sizeof(C) == sizeof(Dqword), |
| 395 "Invalid template paramenter."); |
| 396 static_assert((N & 1) == 0, "N should be divisible by 2"); |
| 397 |
| 398 #define DefinePackedComparisonOperator(Op) \ |
| 399 template <typename Container = C, int Size = N> \ |
| 400 typename std::enable_if<std::is_floating_point<Container>::value, \ |
| 401 Dqword>::type \ |
| 402 operator Op(const Dqword &Rhs) const { \ |
| 403 using ElemType = \ |
| 404 typename std::conditional<std::is_same<float, Container>::value, \ |
| 405 int32_t, int64_t>::type; \ |
| 406 static_assert(sizeof(ElemType) == sizeof(Container), \ |
| 407 "Check ElemType definition."); \ |
| 408 const ElemType *const RhsPtr = \ |
| 409 reinterpret_cast<const ElemType *const>(&Rhs); \ |
| 410 const ElemType *const LhsPtr = \ |
| 411 reinterpret_cast<const ElemType *const>(&Lhs); \ |
| 412 ElemType Ret[N]; \ |
| 413 for (uint32_t i = 0; i < N; ++i) { \ |
| 414 Ret[i] = (LhsPtr[i] Op RhsPtr[i]) ? -1 : 0; \ |
| 415 } \ |
| 416 return *reinterpret_cast<Dqword *>(&Ret); \ |
| 417 } |
| 418 |
| 419 DefinePackedComparisonOperator(< ); |
| 420 DefinePackedComparisonOperator(<= ); |
| 421 DefinePackedComparisonOperator(> ); |
| 422 DefinePackedComparisonOperator(>= ); |
| 423 DefinePackedComparisonOperator(== ); |
| 424 DefinePackedComparisonOperator(!= ); |
| 425 |
| 426 #undef DefinePackedComparisonOperator |
| 427 |
| 428 #define DefinePackedOrdUnordComparisonOperator(Op, Ordered) \ |
| 429 template <typename Container = C, int Size = N> \ |
| 430 typename std::enable_if<std::is_floating_point<Container>::value, \ |
| 431 Dqword>::type \ |
| 432 Op(const Dqword &Rhs) const { \ |
| 433 using ElemType = \ |
| 434 typename std::conditional<std::is_same<float, Container>::value, \ |
| 435 int32_t, int64_t>::type; \ |
| 436 static_assert(sizeof(ElemType) == sizeof(Container), \ |
| 437 "Check ElemType definition."); \ |
| 438 const Container *const RhsPtr = \ |
| 439 reinterpret_cast<const Container *const>(&Rhs); \ |
| 440 const Container *const LhsPtr = \ |
| 441 reinterpret_cast<const Container *const>(&Lhs); \ |
| 442 ElemType Ret[N]; \ |
| 443 for (uint32_t i = 0; i < N; ++i) { \ |
| 444 Ret[i] = (!(LhsPtr[i] == LhsPtr[i]) || !(RhsPtr[i] == RhsPtr[i])) != \ |
| 445 (Ordered) \ |
| 446 ? -1 \ |
| 447 : 0; \ |
| 448 } \ |
| 449 return *reinterpret_cast<Dqword *>(&Ret); \ |
| 450 } |
| 451 |
| 452 DefinePackedOrdUnordComparisonOperator(ord, true); |
| 453 DefinePackedOrdUnordComparisonOperator(unord, false); |
| 454 #undef DefinePackedOrdUnordComparisonOperator |
| 455 |
| 456 #define DefinePackedArithOperator(Op, RhsIndexChanges, NeedsInt) \ |
| 457 template <typename Container = C, int Size = N> \ |
| 458 Dqword operator Op(const Dqword &Rhs) const { \ |
| 459 using ElemTypeForFp = typename std::conditional< \ |
| 460 !(NeedsInt), Container, \ |
| 461 typename std::conditional< \ |
| 462 std::is_same<Container, float>::value, uint32_t, \ |
| 463 typename std::conditional<std::is_same<Container, double>::value, \ |
| 464 uint64_t, void>::type>::type>::type; \ |
| 465 using ElemType = \ |
| 466 typename std::conditional<std::is_integral<Container>::value, \ |
| 467 Container, ElemTypeForFp>::type; \ |
| 468 static_assert(!std::is_same<void, ElemType>::value, \ |
| 469 "Check ElemType definition."); \ |
| 470 const ElemType *const RhsPtr = \ |
| 471 reinterpret_cast<const ElemType *const>(&Rhs); \ |
| 472 const ElemType *const LhsPtr = \ |
| 473 reinterpret_cast<const ElemType *const>(&Lhs); \ |
| 474 ElemType Ret[N]; \ |
| 475 for (uint32_t i = 0; i < N; ++i) { \ |
| 476 Ret[i] = LhsPtr[i] Op RhsPtr[(RhsIndexChanges) ? i : 0]; \ |
| 477 } \ |
| 478 return *reinterpret_cast<Dqword *>(&Ret); \ |
| 479 } |
| 480 |
| 481 DefinePackedArithOperator(>>, false, true); |
| 482 DefinePackedArithOperator(<<, false, true); |
| 483 DefinePackedArithOperator(+, true, false); |
| 484 DefinePackedArithOperator(-, true, false); |
| 485 DefinePackedArithOperator(/, true, false); |
| 486 DefinePackedArithOperator(&, true, true); |
| 487 DefinePackedArithOperator(|, true, true); |
| 488 DefinePackedArithOperator (^, true, true); |
| 489 |
| 490 #undef DefinePackedArithOperator |
| 491 |
| 492 #define DefinePackedArithShiftImm(Op) \ |
| 493 template <typename Container = C, int Size = N> \ |
| 494 Dqword operator Op(uint8_t imm) const { \ |
| 495 const Container *const LhsPtr = \ |
| 496 reinterpret_cast<const Container *const>(&Lhs); \ |
| 497 Container Ret[N]; \ |
| 498 for (uint32_t i = 0; i < N; ++i) { \ |
| 499 Ret[i] = LhsPtr[i] Op imm; \ |
| 500 } \ |
| 501 return *reinterpret_cast<Dqword *>(&Ret); \ |
| 502 } |
| 503 |
| 504 DefinePackedArithShiftImm(>> ); |
| 505 DefinePackedArithShiftImm(<< ); |
| 506 |
| 507 #undef DefinePackedArithShiftImm |
| 508 |
| 509 template <typename Container = C, int Size = N> |
| 510 typename std::enable_if<std::is_signed<Container>::value || |
| 511 std::is_floating_point<Container>::value, |
| 512 Dqword>::type |
| 513 operator*(const Dqword &Rhs) const { |
| 514 static_assert((std::is_integral<Container>::value && |
| 515 sizeof(Container) < sizeof(uint64_t)) || |
| 516 std::is_floating_point<Container>::value, |
| 517 "* is only defined for i(8|16|32), and fp types."); |
| 518 |
| 519 const Container *const RhsPtr = |
| 520 reinterpret_cast<const Container *const>(&Rhs); |
| 521 const Container *const LhsPtr = |
| 522 reinterpret_cast<const Container *const>(&Lhs); |
| 523 Container Ret[Size]; |
| 524 for (uint32_t i = 0; i < Size; ++i) { |
| 525 Ret[i] = LhsPtr[i] * RhsPtr[i]; |
| 526 } |
| 527 return *reinterpret_cast<Dqword *>(&Ret); |
| 528 } |
| 529 |
| 530 template <typename Container = C, int Size = N, |
| 531 typename = typename std::enable_if< |
| 532 !std::is_signed<Container>::value>::type> |
| 533 Dqword operator*(const Dqword &Rhs) const { |
| 534 static_assert(std::is_integral<Container>::value && |
| 535 sizeof(Container) < sizeof(uint64_t), |
| 536 "* is only defined for ui(8|16|32)"); |
| 537 using NextType = typename std::conditional< |
| 538 sizeof(Container) == 1, uint16_t, |
| 539 typename std::conditional<sizeof(Container) == 2, uint32_t, |
| 540 uint64_t>::type>::type; |
| 541 static_assert(sizeof(Container) * 2 == sizeof(NextType), |
| 542 "Unexpected size"); |
| 543 |
| 544 const Container *const RhsPtr = |
| 545 reinterpret_cast<const Container *const>(&Rhs); |
| 546 const Container *const LhsPtr = |
| 547 reinterpret_cast<const Container *const>(&Lhs); |
| 548 NextType Ret[Size / 2]; |
| 549 for (uint32_t i = 0; i < Size; i += 2) { |
| 550 Ret[i / 2] = |
| 551 static_cast<NextType>(LhsPtr[i]) * static_cast<NextType>(RhsPtr[i]); |
| 552 } |
| 553 return *reinterpret_cast<Dqword *>(&Ret); |
| 554 } |
| 555 |
| 556 template <typename Container = C, int Size = N> |
| 557 PackedArith<Container> operator~() const { |
| 558 const Container *const LhsPtr = |
| 559 reinterpret_cast<const Container *const>(&Lhs); |
| 560 Container Ret[Size]; |
| 561 for (uint32_t i = 0; i < Size; ++i) { |
| 562 Ret[i] = ~LhsPtr[i]; |
| 563 } |
| 564 return PackedArith<Container>(*reinterpret_cast<Dqword *>(&Ret)); |
| 565 } |
| 566 |
| 567 #define MinMaxOperations(Name, Suffix) \ |
| 568 template <typename Container = C, int Size = N> \ |
| 569 Dqword Name##Suffix(const Dqword &Rhs) const { \ |
| 570 static_assert(std::is_floating_point<Container>::value, \ |
| 571 #Name #Suffix "ps is only available for fp."); \ |
| 572 const Container *const RhsPtr = \ |
| 573 reinterpret_cast<const Container *const>(&Rhs); \ |
| 574 const Container *const LhsPtr = \ |
| 575 reinterpret_cast<const Container *const>(&Lhs); \ |
| 576 Container Ret[Size]; \ |
| 577 for (uint32_t i = 0; i < Size; ++i) { \ |
| 578 Ret[i] = std::Name(LhsPtr[i], RhsPtr[i]); \ |
| 579 } \ |
| 580 return *reinterpret_cast<Dqword *>(&Ret); \ |
| 581 } |
| 582 |
| 583 MinMaxOperations(max, ps); |
| 584 MinMaxOperations(max, pd); |
| 585 MinMaxOperations(min, ps); |
| 586 MinMaxOperations(min, pd); |
| 587 #undef MinMaxOperations |
| 588 |
| 589 template <typename Container = C, int Size = N> |
| 590 Dqword blendWith(const Dqword &Rhs, const Dqword &Mask) const { |
| 591 using MaskType = typename std::conditional< |
| 592 sizeof(Container) == 1, int8_t, |
| 593 typename std::conditional<sizeof(Container) == 2, int16_t, |
| 594 int32_t>::type>::type; |
| 595 static_assert(sizeof(MaskType) == sizeof(Container), |
| 596 "MaskType has the wrong size."); |
| 597 const Container *const RhsPtr = |
| 598 reinterpret_cast<const Container *const>(&Rhs); |
| 599 const Container *const LhsPtr = |
| 600 reinterpret_cast<const Container *const>(&Lhs); |
| 601 const MaskType *const MaskPtr = |
| 602 reinterpret_cast<const MaskType *const>(&Mask); |
| 603 Container Ret[Size]; |
| 604 for (int i = 0; i < Size; ++i) { |
| 605 Ret[i] = ((MaskPtr[i] < 0) ? RhsPtr : LhsPtr)[i]; |
| 606 } |
| 607 return *reinterpret_cast<Dqword *>(&Ret); |
| 608 } |
| 609 |
| 610 private: |
| 611 // The AssemblerX8664Test class needs to be a friend so that it can create |
| 612 // PackedArith objects (see below.) |
| 613 friend class AssemblerX8664Test; |
| 614 |
| 615 explicit PackedArith(const Dqword &MyLhs) : Lhs(MyLhs) {} |
| 616 |
| 617 // Lhs can't be a & because operator~ returns a temporary object that needs |
| 618 // access to its own Dqword. |
| 619 const Dqword Lhs; |
| 620 }; |
| 621 |
| 622 // Named constructor for PackedArith objects. |
| 623 template <typename C> static PackedArith<C> packedAs(const Dqword &D) { |
| 624 return PackedArith<C>(D); |
| 625 } |
| 626 |
| 627 AssemblerX8664Test() { reset(); } |
| 628 |
| 629 void reset() { |
| 630 AssemblerX8664TestBase::reset(); |
| 631 |
| 632 NeedsEpilogue = true; |
| 633 // These dwords are allocated for saving the GPR state after the jitted code |
| 634 // runs. |
| 635 NumAllocatedDwords = AssembledTest::ScratchpadSlots; |
| 636 addPrologue(); |
| 637 } |
| 638 |
| 639 // AssembledTest is a wrapper around a PROT_EXEC mmap'ed buffer. This buffer |
| 640 // contains both the test code as well as prologue/epilogue, and the |
| 641 // scratchpad area that tests may use -- all tests use this scratchpad area |
| 642 // for storing the processor's registers after the tests executed. This class |
| 643 // also exposes helper methods for reading the register state after test |
| 644 // execution, as well as for reading the scratchpad area. |
| 645 class AssembledTest { |
| 646 AssembledTest() = delete; |
| 647 AssembledTest(const AssembledTest &) = delete; |
| 648 AssembledTest &operator=(const AssembledTest &) = delete; |
| 649 |
| 650 public: |
| 651 static constexpr uint32_t MaximumCodeSize = 1 << 20; |
| 652 static constexpr uint32_t raxSlot() { return 0; } |
| 653 static constexpr uint32_t rbxSlot() { return 2; } |
| 654 static constexpr uint32_t rcxSlot() { return 4; } |
| 655 static constexpr uint32_t rdxSlot() { return 6; } |
| 656 static constexpr uint32_t rdiSlot() { return 8; } |
| 657 static constexpr uint32_t rsiSlot() { return 10; } |
| 658 static constexpr uint32_t rbpSlot() { return 12; } |
| 659 static constexpr uint32_t rspSlot() { return 14; } |
| 660 static constexpr uint32_t r8Slot() { return 16; } |
| 661 static constexpr uint32_t r9Slot() { return 18; } |
| 662 static constexpr uint32_t r10Slot() { return 20; } |
| 663 static constexpr uint32_t r11Slot() { return 22; } |
| 664 static constexpr uint32_t r12Slot() { return 24; } |
| 665 static constexpr uint32_t r13Slot() { return 26; } |
| 666 static constexpr uint32_t r14Slot() { return 28; } |
| 667 static constexpr uint32_t r15Slot() { return 30; } |
| 668 |
| 669 // save 4 dwords for each xmm registers. |
| 670 static constexpr uint32_t xmm0Slot() { return 32; } |
| 671 static constexpr uint32_t xmm1Slot() { return 36; } |
| 672 static constexpr uint32_t xmm2Slot() { return 40; } |
| 673 static constexpr uint32_t xmm3Slot() { return 44; } |
| 674 static constexpr uint32_t xmm4Slot() { return 48; } |
| 675 static constexpr uint32_t xmm5Slot() { return 52; } |
| 676 static constexpr uint32_t xmm6Slot() { return 56; } |
| 677 static constexpr uint32_t xmm7Slot() { return 60; } |
| 678 static constexpr uint32_t xmm8Slot() { return 64; } |
| 679 static constexpr uint32_t xmm9Slot() { return 68; } |
| 680 static constexpr uint32_t xmm10Slot() { return 72; } |
| 681 static constexpr uint32_t xmm11Slot() { return 76; } |
| 682 static constexpr uint32_t xmm12Slot() { return 80; } |
| 683 static constexpr uint32_t xmm13Slot() { return 84; } |
| 684 static constexpr uint32_t xmm14Slot() { return 88; } |
| 685 static constexpr uint32_t xmm15Slot() { return 92; } |
| 686 |
| 687 static constexpr uint32_t ScratchpadSlots = 96; |
| 688 |
| 689 AssembledTest(const uint8_t *Data, const size_t MySize, |
| 690 const size_t ExtraStorageDwords) |
| 691 : Size(MaximumCodeSize + 4 * ExtraStorageDwords) { |
| 692 // MaxCodeSize is needed because EXPECT_LT needs a symbol with a name -- |
| 693 // probably a compiler bug? |
| 694 uint32_t MaxCodeSize = MaximumCodeSize; |
| 695 EXPECT_LT(MySize, MaxCodeSize); |
| 696 assert(MySize < MaximumCodeSize); |
| 697 ExecutableData = mmap(nullptr, Size, PROT_WRITE | PROT_READ | PROT_EXEC, |
| 698 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); |
| 699 EXPECT_NE(MAP_FAILED, ExecutableData) << strerror(errno); |
| 700 assert(MAP_FAILED != ExecutableData); |
| 701 std::memcpy(ExecutableData, Data, MySize); |
| 702 } |
| 703 |
| 704 // We allow AssembledTest to be moved so that we can return objects of |
| 705 // this type. |
| 706 AssembledTest(AssembledTest &&Buffer) |
| 707 : ExecutableData(Buffer.ExecutableData), Size(Buffer.Size) { |
| 708 Buffer.ExecutableData = nullptr; |
| 709 Buffer.Size = 0; |
| 710 } |
| 711 |
| 712 AssembledTest &operator=(AssembledTest &&Buffer) { |
| 713 ExecutableData = Buffer.ExecutableData; |
| 714 Buffer.ExecutableData = nullptr; |
| 715 Size = Buffer.Size; |
| 716 Buffer.Size = 0; |
| 717 return *this; |
| 718 } |
| 719 |
| 720 ~AssembledTest() { |
| 721 if (ExecutableData != nullptr) { |
| 722 munmap(ExecutableData, Size); |
| 723 ExecutableData = nullptr; |
| 724 } |
| 725 } |
| 726 |
| 727 void run() const { reinterpret_cast<void (*)()>(ExecutableData)(); } |
| 728 |
| 729 #define LegacyRegAccessors(NewName, Name64, Name32, Name16, Name8) \ |
| 730 static_assert(Encoded_GPR_##NewName() == Encoded_GPR_##Name64(), \ |
| 731 "Invalid aliasing."); \ |
| 732 uint64_t NewName() const { \ |
| 733 return contentsOfQword(AssembledTest::Name64##Slot()); \ |
| 734 } \ |
| 735 static_assert(Encoded_GPR_##NewName##q() == Encoded_GPR_##Name64(), \ |
| 736 "Invalid aliasing."); \ |
| 737 uint64_t NewName##q() const { \ |
| 738 return contentsOfQword(AssembledTest::Name64##Slot()); \ |
| 739 } \ |
| 740 static_assert(Encoded_GPR_##NewName##d() == Encoded_GPR_##Name64(), \ |
| 741 "Invalid aliasing."); \ |
| 742 uint32_t NewName##d() const { \ |
| 743 return contentsOfQword(AssembledTest::Name64##Slot()); \ |
| 744 } \ |
| 745 static_assert(Encoded_GPR_##NewName##w() == Encoded_GPR_##Name64(), \ |
| 746 "Invalid aliasing."); \ |
| 747 uint16_t NewName##w() const { \ |
| 748 return contentsOfQword(AssembledTest::Name64##Slot()); \ |
| 749 } \ |
| 750 static_assert(Encoded_GPR_##NewName##l() == Encoded_GPR_##Name64(), \ |
| 751 "Invalid aliasing."); \ |
| 752 uint8_t NewName##l() const { \ |
| 753 return contentsOfQword(AssembledTest::Name64##Slot()); \ |
| 754 } \ |
| 755 static_assert(Encoded_GPR_##Name64() == Encoded_GPR_##Name64(), \ |
| 756 "Invalid aliasing."); \ |
| 757 uint64_t Name64() const { \ |
| 758 return contentsOfQword(AssembledTest::Name64##Slot()); \ |
| 759 } \ |
| 760 static_assert(Encoded_GPR_##Name32() == Encoded_GPR_##Name64(), \ |
| 761 "Invalid aliasing."); \ |
| 762 uint32_t Name32() const { \ |
| 763 return contentsOfQword(AssembledTest::Name64##Slot()); \ |
| 764 } \ |
| 765 static_assert(Encoded_GPR_##Name16() == Encoded_GPR_##Name64(), \ |
| 766 "Invalid aliasing."); \ |
| 767 uint16_t Name16() const { \ |
| 768 return contentsOfQword(AssembledTest::Name64##Slot()); \ |
| 769 } \ |
| 770 static_assert(Encoded_GPR_##Name8() == Encoded_GPR_##Name64(), \ |
| 771 "Invalid aliasing."); \ |
| 772 uint8_t Name8() const { \ |
| 773 return contentsOfQword(AssembledTest::Name64##Slot()); \ |
| 774 } |
| 775 #define NewRegAccessors(NewName) \ |
| 776 uint64_t NewName() const { \ |
| 777 return contentsOfQword(AssembledTest::NewName##Slot()); \ |
| 778 } \ |
| 779 uint64_t NewName##q() const { \ |
| 780 return contentsOfQword(AssembledTest::NewName##Slot()); \ |
| 781 } \ |
| 782 uint32_t NewName##d() const { \ |
| 783 return contentsOfQword(AssembledTest::NewName##Slot()); \ |
| 784 } \ |
| 785 uint16_t NewName##w() const { \ |
| 786 return contentsOfQword(AssembledTest::NewName##Slot()); \ |
| 787 } \ |
| 788 uint8_t NewName##l() const { \ |
| 789 return contentsOfQword(AssembledTest::NewName##Slot()); \ |
| 790 } |
| 791 #define XmmRegAccessor(Name) \ |
| 792 template <typename T> T Name() const { \ |
| 793 return xmm<T>(AssembledTest::Name##Slot()); \ |
| 794 } |
| 795 LegacyRegAccessors(r0, rsp, esp, sp, spl); |
| 796 LegacyRegAccessors(r1, rax, eax, ax, al); |
| 797 LegacyRegAccessors(r2, rbx, ebx, bx, bl); |
| 798 LegacyRegAccessors(r3, rcx, ecx, cx, cl); |
| 799 LegacyRegAccessors(r4, rdx, edx, dx, dl); |
| 800 LegacyRegAccessors(r5, rbp, ebp, bp, bpl); |
| 801 LegacyRegAccessors(r6, rsi, esi, si, sil); |
| 802 LegacyRegAccessors(r7, rdi, edi, di, dil); |
| 803 NewRegAccessors(r8); |
| 804 NewRegAccessors(r9); |
| 805 NewRegAccessors(r10); |
| 806 NewRegAccessors(r11); |
| 807 NewRegAccessors(r12); |
| 808 NewRegAccessors(r13); |
| 809 NewRegAccessors(r14); |
| 810 NewRegAccessors(r15); |
| 811 XmmRegAccessor(xmm0); |
| 812 XmmRegAccessor(xmm1); |
| 813 XmmRegAccessor(xmm2); |
| 814 XmmRegAccessor(xmm3); |
| 815 XmmRegAccessor(xmm4); |
| 816 XmmRegAccessor(xmm5); |
| 817 XmmRegAccessor(xmm6); |
| 818 XmmRegAccessor(xmm7); |
| 819 XmmRegAccessor(xmm8); |
| 820 XmmRegAccessor(xmm9); |
| 821 XmmRegAccessor(xmm10); |
| 822 XmmRegAccessor(xmm11); |
| 823 XmmRegAccessor(xmm12); |
| 824 XmmRegAccessor(xmm13); |
| 825 XmmRegAccessor(xmm14); |
| 826 XmmRegAccessor(xmm15); |
| 827 #undef XmmRegAccessor |
| 828 #undef NewRegAccessors |
| 829 #undef LegacyRegAccessors |
| 830 |
| 831 // contentsOfDword is used for reading the values in the scratchpad area. |
| 832 // Valid arguments are the dword ids returned by |
| 833 // AssemblerX8664Test::allocateDword() -- other inputs are considered |
| 834 // invalid, and are not guaranteed to work if the implementation changes. |
| 835 template <typename T = uint32_t, typename = typename std::enable_if< |
| 836 sizeof(T) == sizeof(uint32_t)>::type> |
| 837 T contentsOfDword(uint32_t Dword) const { |
| 838 return *reinterpret_cast<T *>(static_cast<uint8_t *>(ExecutableData) + |
| 839 dwordOffset(Dword)); |
| 840 } |
| 841 |
| 842 template <typename T = uint64_t, typename = typename std::enable_if< |
| 843 sizeof(T) == sizeof(uint64_t)>::type> |
| 844 T contentsOfQword(uint32_t InitialDword) const { |
| 845 return *reinterpret_cast<T *>(static_cast<uint8_t *>(ExecutableData) + |
| 846 dwordOffset(InitialDword)); |
| 847 } |
| 848 |
| 849 Dqword contentsOfDqword(uint32_t InitialDword) const { |
| 850 return *reinterpret_cast<Dqword *>( |
| 851 static_cast<uint8_t *>(ExecutableData) + |
| 852 dwordOffset(InitialDword)); |
| 853 } |
| 854 |
| 855 template <typename T = uint32_t, typename = typename std::enable_if< |
| 856 sizeof(T) == sizeof(uint32_t)>::type> |
| 857 void setDwordTo(uint32_t Dword, T value) { |
| 858 *reinterpret_cast<uint32_t *>(static_cast<uint8_t *>(ExecutableData) + |
| 859 dwordOffset(Dword)) = |
| 860 *reinterpret_cast<uint32_t *>(&value); |
| 861 } |
| 862 |
| 863 template <typename T = uint64_t, typename = typename std::enable_if< |
| 864 sizeof(T) == sizeof(uint64_t)>::type> |
| 865 void setQwordTo(uint32_t InitialDword, T value) { |
| 866 *reinterpret_cast<uint64_t *>(static_cast<uint8_t *>(ExecutableData) + |
| 867 dwordOffset(InitialDword)) = |
| 868 *reinterpret_cast<uint64_t *>(&value); |
| 869 } |
| 870 |
| 871 void setDqwordTo(uint32_t InitialDword, const Dqword &qdword) { |
| 872 setQwordTo(InitialDword, qdword.U64[0]); |
| 873 setQwordTo(InitialDword + 2, qdword.U64[1]); |
| 874 } |
| 875 |
| 876 private: |
| 877 template <typename T> |
| 878 typename std::enable_if<std::is_same<T, Dqword>::value, Dqword>::type |
| 879 xmm(uint8_t Slot) const { |
| 880 return contentsOfDqword(Slot); |
| 881 } |
| 882 |
| 883 template <typename T> |
| 884 typename std::enable_if<!std::is_same<T, Dqword>::value, T>::type |
| 885 xmm(uint8_t Slot) const { |
| 886 constexpr bool TIs64Bit = sizeof(T) == sizeof(uint64_t); |
| 887 using _64BitType = typename std::conditional<TIs64Bit, T, uint64_t>::type; |
| 888 using _32BitType = typename std::conditional<TIs64Bit, uint32_t, T>::type; |
| 889 if (TIs64Bit) { |
| 890 return contentsOfQword<_64BitType>(Slot); |
| 891 } |
| 892 return contentsOfDword<_32BitType>(Slot); |
| 893 } |
| 894 |
| 895 static uint32_t dwordOffset(uint32_t Index) { |
| 896 return MaximumCodeSize + (Index * 4); |
| 897 } |
| 898 |
| 899 void *ExecutableData = nullptr; |
| 900 size_t Size; |
| 901 }; |
| 902 |
| 903 // assemble created an AssembledTest with the jitted code. The first time |
| 904 // assemble is executed it will add the epilogue to the jitted code (which is |
| 905 // the reason why this method is not const qualified. |
| 906 AssembledTest assemble() { |
| 907 if (NeedsEpilogue) { |
| 908 addEpilogue(); |
| 909 } |
| 910 |
| 911 NeedsEpilogue = false; |
| 912 return AssembledTest(codeBytes(), codeBytesSize(), NumAllocatedDwords); |
| 913 } |
| 914 |
| 915 // Allocates a new dword slot in the test's scratchpad area. |
| 916 uint32_t allocateDword() { return NumAllocatedDwords++; } |
| 917 |
| 918 // Allocates a new qword slot in the test's scratchpad area. |
| 919 uint32_t allocateQword() { |
| 920 uint32_t InitialDword = allocateDword(); |
| 921 allocateDword(); |
| 922 return InitialDword; |
| 923 } |
| 924 |
| 925 // Allocates a new dqword slot in the test's scratchpad area. |
| 926 uint32_t allocateDqword() { |
| 927 uint32_t InitialDword = allocateQword(); |
| 928 allocateQword(); |
| 929 return InitialDword; |
| 930 } |
| 931 |
| 932 Address dwordAddress(uint32_t Dword) { |
| 933 return Address(Encoded_GPR_r9(), dwordDisp(Dword)); |
| 934 } |
| 935 |
| 936 private: |
| 937 // e??SlotAddress returns an AssemblerX8664::Traits::Address that can be used |
| 938 // by the test cases to encode an address operand for accessing the slot for |
| 939 // the specified register. These are all private for, when jitting the test |
| 940 // code, tests should not tamper with these values. Besides, during the test |
| 941 // execution these slots' contents are undefined and should not be accessed. |
| 942 Address raxSlotAddress() { return dwordAddress(AssembledTest::raxSlot()); } |
| 943 Address rbxSlotAddress() { return dwordAddress(AssembledTest::rbxSlot()); } |
| 944 Address rcxSlotAddress() { return dwordAddress(AssembledTest::rcxSlot()); } |
| 945 Address rdxSlotAddress() { return dwordAddress(AssembledTest::rdxSlot()); } |
| 946 Address rdiSlotAddress() { return dwordAddress(AssembledTest::rdiSlot()); } |
| 947 Address rsiSlotAddress() { return dwordAddress(AssembledTest::rsiSlot()); } |
| 948 Address rbpSlotAddress() { return dwordAddress(AssembledTest::rbpSlot()); } |
| 949 Address rspSlotAddress() { return dwordAddress(AssembledTest::rspSlot()); } |
| 950 Address r8SlotAddress() { return dwordAddress(AssembledTest::r8Slot()); } |
| 951 Address r9SlotAddress() { return dwordAddress(AssembledTest::r9Slot()); } |
| 952 Address r10SlotAddress() { return dwordAddress(AssembledTest::r10Slot()); } |
| 953 Address r11SlotAddress() { return dwordAddress(AssembledTest::r11Slot()); } |
| 954 Address r12SlotAddress() { return dwordAddress(AssembledTest::r12Slot()); } |
| 955 Address r13SlotAddress() { return dwordAddress(AssembledTest::r13Slot()); } |
| 956 Address r14SlotAddress() { return dwordAddress(AssembledTest::r14Slot()); } |
| 957 Address r15SlotAddress() { return dwordAddress(AssembledTest::r15Slot()); } |
| 958 Address xmm0SlotAddress() { return dwordAddress(AssembledTest::xmm0Slot()); } |
| 959 Address xmm1SlotAddress() { return dwordAddress(AssembledTest::xmm1Slot()); } |
| 960 Address xmm2SlotAddress() { return dwordAddress(AssembledTest::xmm2Slot()); } |
| 961 Address xmm3SlotAddress() { return dwordAddress(AssembledTest::xmm3Slot()); } |
| 962 Address xmm4SlotAddress() { return dwordAddress(AssembledTest::xmm4Slot()); } |
| 963 Address xmm5SlotAddress() { return dwordAddress(AssembledTest::xmm5Slot()); } |
| 964 Address xmm6SlotAddress() { return dwordAddress(AssembledTest::xmm6Slot()); } |
| 965 Address xmm7SlotAddress() { return dwordAddress(AssembledTest::xmm7Slot()); } |
| 966 Address xmm8SlotAddress() { return dwordAddress(AssembledTest::xmm8Slot()); } |
| 967 Address xmm9SlotAddress() { return dwordAddress(AssembledTest::xmm9Slot()); } |
| 968 Address xmm10SlotAddress() { |
| 969 return dwordAddress(AssembledTest::xmm10Slot()); |
| 970 } |
| 971 Address xmm11SlotAddress() { |
| 972 return dwordAddress(AssembledTest::xmm11Slot()); |
| 973 } |
| 974 Address xmm12SlotAddress() { |
| 975 return dwordAddress(AssembledTest::xmm12Slot()); |
| 976 } |
| 977 Address xmm13SlotAddress() { |
| 978 return dwordAddress(AssembledTest::xmm13Slot()); |
| 979 } |
| 980 Address xmm14SlotAddress() { |
| 981 return dwordAddress(AssembledTest::xmm14Slot()); |
| 982 } |
| 983 Address xmm15SlotAddress() { |
| 984 return dwordAddress(AssembledTest::xmm15Slot()); |
| 985 } |
| 986 |
| 987 // Returns the displacement that should be used when accessing the specified |
| 988 // Dword in the scratchpad area. It needs to adjust for the initial |
| 989 // instructions that are emitted before the call that materializes the IP |
| 990 // register. |
| 991 uint32_t dwordDisp(uint32_t Dword) const { |
| 992 EXPECT_LT(Dword, NumAllocatedDwords); |
| 993 assert(Dword < NumAllocatedDwords); |
| 994 static constexpr uint8_t PushR9Bytes = 2; |
| 995 static constexpr uint8_t CallImmBytes = 5; |
| 996 return AssembledTest::MaximumCodeSize + (Dword * 4) - |
| 997 (PushR9Bytes + CallImmBytes); |
| 998 } |
| 999 |
| 1000 void addPrologue() { |
| 1001 __ pushl(Encoded_GPR_r9()); |
| 1002 __ call(Immediate(4)); |
| 1003 __ popl(Encoded_GPR_r9()); |
| 1004 |
| 1005 __ pushl(Encoded_GPR_rax()); |
| 1006 __ pushl(Encoded_GPR_rbx()); |
| 1007 __ pushl(Encoded_GPR_rcx()); |
| 1008 __ pushl(Encoded_GPR_rdx()); |
| 1009 __ pushl(Encoded_GPR_rbp()); |
| 1010 __ pushl(Encoded_GPR_rdi()); |
| 1011 __ pushl(Encoded_GPR_rsi()); |
| 1012 __ pushl(Encoded_GPR_r8()); |
| 1013 __ pushl(Encoded_GPR_r10()); |
| 1014 __ pushl(Encoded_GPR_r11()); |
| 1015 __ pushl(Encoded_GPR_r12()); |
| 1016 __ pushl(Encoded_GPR_r13()); |
| 1017 __ pushl(Encoded_GPR_r14()); |
| 1018 __ pushl(Encoded_GPR_r15()); |
| 1019 |
| 1020 __ mov(IceType_i32, Encoded_GPR_rax(), Immediate(0x00)); |
| 1021 __ mov(IceType_i32, Encoded_GPR_rbx(), Immediate(0x00)); |
| 1022 __ mov(IceType_i32, Encoded_GPR_rcx(), Immediate(0x00)); |
| 1023 __ mov(IceType_i32, Encoded_GPR_rdx(), Immediate(0x00)); |
| 1024 __ mov(IceType_i32, Encoded_GPR_rbp(), Immediate(0x00)); |
| 1025 __ mov(IceType_i32, Encoded_GPR_rdi(), Immediate(0x00)); |
| 1026 __ mov(IceType_i32, Encoded_GPR_rsi(), Immediate(0x00)); |
| 1027 __ mov(IceType_i32, Encoded_GPR_r8(), Immediate(0x00)); |
| 1028 __ mov(IceType_i32, Encoded_GPR_r10(), Immediate(0x00)); |
| 1029 __ mov(IceType_i32, Encoded_GPR_r11(), Immediate(0x00)); |
| 1030 __ mov(IceType_i32, Encoded_GPR_r12(), Immediate(0x00)); |
| 1031 __ mov(IceType_i32, Encoded_GPR_r13(), Immediate(0x00)); |
| 1032 __ mov(IceType_i32, Encoded_GPR_r14(), Immediate(0x00)); |
| 1033 __ mov(IceType_i32, Encoded_GPR_r15(), Immediate(0x00)); |
| 1034 } |
| 1035 |
| 1036 void addEpilogue() { |
| 1037 __ mov(IceType_i64, raxSlotAddress(), Encoded_GPR_rax()); |
| 1038 __ mov(IceType_i64, rbxSlotAddress(), Encoded_GPR_rbx()); |
| 1039 __ mov(IceType_i64, rcxSlotAddress(), Encoded_GPR_rcx()); |
| 1040 __ mov(IceType_i64, rdxSlotAddress(), Encoded_GPR_rdx()); |
| 1041 __ mov(IceType_i64, rdiSlotAddress(), Encoded_GPR_rdi()); |
| 1042 __ mov(IceType_i64, rsiSlotAddress(), Encoded_GPR_rsi()); |
| 1043 __ mov(IceType_i64, rbpSlotAddress(), Encoded_GPR_rbp()); |
| 1044 __ mov(IceType_i64, rspSlotAddress(), Encoded_GPR_rsp()); |
| 1045 __ mov(IceType_i64, r8SlotAddress(), Encoded_GPR_r8()); |
| 1046 __ mov(IceType_i64, r9SlotAddress(), Encoded_GPR_r9()); |
| 1047 __ mov(IceType_i64, r10SlotAddress(), Encoded_GPR_r10()); |
| 1048 __ mov(IceType_i64, r11SlotAddress(), Encoded_GPR_r11()); |
| 1049 __ mov(IceType_i64, r12SlotAddress(), Encoded_GPR_r12()); |
| 1050 __ mov(IceType_i64, r13SlotAddress(), Encoded_GPR_r13()); |
| 1051 __ mov(IceType_i64, r14SlotAddress(), Encoded_GPR_r14()); |
| 1052 __ mov(IceType_i64, r15SlotAddress(), Encoded_GPR_r15()); |
| 1053 __ movups(xmm0SlotAddress(), Encoded_Xmm_xmm0()); |
| 1054 __ movups(xmm1SlotAddress(), Encoded_Xmm_xmm1()); |
| 1055 __ movups(xmm2SlotAddress(), Encoded_Xmm_xmm2()); |
| 1056 __ movups(xmm3SlotAddress(), Encoded_Xmm_xmm3()); |
| 1057 __ movups(xmm4SlotAddress(), Encoded_Xmm_xmm4()); |
| 1058 __ movups(xmm5SlotAddress(), Encoded_Xmm_xmm5()); |
| 1059 __ movups(xmm6SlotAddress(), Encoded_Xmm_xmm6()); |
| 1060 __ movups(xmm7SlotAddress(), Encoded_Xmm_xmm7()); |
| 1061 __ movups(xmm8SlotAddress(), Encoded_Xmm_xmm8()); |
| 1062 __ movups(xmm9SlotAddress(), Encoded_Xmm_xmm9()); |
| 1063 __ movups(xmm10SlotAddress(), Encoded_Xmm_xmm10()); |
| 1064 __ movups(xmm11SlotAddress(), Encoded_Xmm_xmm11()); |
| 1065 __ movups(xmm12SlotAddress(), Encoded_Xmm_xmm12()); |
| 1066 __ movups(xmm13SlotAddress(), Encoded_Xmm_xmm13()); |
| 1067 __ movups(xmm14SlotAddress(), Encoded_Xmm_xmm14()); |
| 1068 __ movups(xmm15SlotAddress(), Encoded_Xmm_xmm15()); |
| 1069 |
| 1070 __ popl(Encoded_GPR_r15()); |
| 1071 __ popl(Encoded_GPR_r14()); |
| 1072 __ popl(Encoded_GPR_r13()); |
| 1073 __ popl(Encoded_GPR_r12()); |
| 1074 __ popl(Encoded_GPR_r11()); |
| 1075 __ popl(Encoded_GPR_r10()); |
| 1076 __ popl(Encoded_GPR_r8()); |
| 1077 __ popl(Encoded_GPR_rsi()); |
| 1078 __ popl(Encoded_GPR_rdi()); |
| 1079 __ popl(Encoded_GPR_rbp()); |
| 1080 __ popl(Encoded_GPR_rdx()); |
| 1081 __ popl(Encoded_GPR_rcx()); |
| 1082 __ popl(Encoded_GPR_rbx()); |
| 1083 __ popl(Encoded_GPR_rax()); |
| 1084 __ popl(Encoded_GPR_r9()); |
| 1085 |
| 1086 __ ret(); |
| 1087 } |
| 1088 |
| 1089 bool NeedsEpilogue; |
| 1090 uint32_t NumAllocatedDwords; |
| 1091 }; |
| 1092 |
| 1093 } // end of namespace Test |
| 1094 } // end of namespace X8664 |
| 1095 } // end of namespace Ice |
| 1096 |
| 1097 #endif // ASSEMBLERX8664_TESTUTIL_H_ |
| OLD | NEW |