| OLD | NEW |
| (Empty) | |
| 1 //===- subzero/unittest/AssemblerX8664/XmmArith.cpp -----------------------===// |
| 2 // |
| 3 // The Subzero Code Generator |
| 4 // |
| 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. |
| 7 // |
| 8 //===----------------------------------------------------------------------===// |
| 9 #include "AssemblerX8664/TestUtil.h" |
| 10 |
| 11 namespace Ice { |
| 12 namespace X8664 { |
| 13 namespace Test { |
| 14 namespace { |
| 15 |
| 16 TEST_F(AssemblerX8664Test, ArithSS) { |
| 17 #define TestArithSSXmmXmm(FloatSize, Src, Value0, Dst, Value1, Inst, Op) \ |
| 18 do { \ |
| 19 static_assert(FloatSize == 32 || FloatSize == 64, \ |
| 20 "Invalid fp size " #FloatSize); \ |
| 21 static constexpr char TestString[] = \ |
| 22 "(" #FloatSize ", " #Src ", " #Value0 ", " #Dst ", " #Value1 \ |
| 23 ", " #Inst ", " #Op ")"; \ |
| 24 static constexpr bool IsDouble = FloatSize == 64; \ |
| 25 using Type = std::conditional<IsDouble, double, float>::type; \ |
| 26 const uint32_t T0 = allocateQword(); \ |
| 27 const Type V0 = Value0; \ |
| 28 const uint32_t T1 = allocateQword(); \ |
| 29 const Type V1 = Value1; \ |
| 30 \ |
| 31 __ movss(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 32 __ movss(IceType_f##FloatSize, Encoded_Xmm_##Src(), dwordAddress(T1)); \ |
| 33 __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ |
| 34 \ |
| 35 AssembledTest test = assemble(); \ |
| 36 if (IsDouble) { \ |
| 37 test.setQwordTo(T0, static_cast<double>(V0)); \ |
| 38 test.setQwordTo(T1, static_cast<double>(V1)); \ |
| 39 } else { \ |
| 40 test.setDwordTo(T0, static_cast<float>(V0)); \ |
| 41 test.setDwordTo(T1, static_cast<float>(V1)); \ |
| 42 } \ |
| 43 \ |
| 44 test.run(); \ |
| 45 \ |
| 46 ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString; \ |
| 47 reset(); \ |
| 48 } while (0) |
| 49 |
| 50 #define TestArithSSXmmAddr(FloatSize, Value0, Dst, Value1, Inst, Op) \ |
| 51 do { \ |
| 52 static_assert(FloatSize == 32 || FloatSize == 64, \ |
| 53 "Invalid fp size " #FloatSize); \ |
| 54 static constexpr char TestString[] = \ |
| 55 "(" #FloatSize ", Addr, " #Value0 ", " #Dst ", " #Value1 ", " #Inst \ |
| 56 ", " #Op ")"; \ |
| 57 static constexpr bool IsDouble = FloatSize == 64; \ |
| 58 using Type = std::conditional<IsDouble, double, float>::type; \ |
| 59 const uint32_t T0 = allocateQword(); \ |
| 60 const Type V0 = Value0; \ |
| 61 const uint32_t T1 = allocateQword(); \ |
| 62 const Type V1 = Value1; \ |
| 63 \ |
| 64 __ movss(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 65 __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1)); \ |
| 66 \ |
| 67 AssembledTest test = assemble(); \ |
| 68 if (IsDouble) { \ |
| 69 test.setQwordTo(T0, static_cast<double>(V0)); \ |
| 70 test.setQwordTo(T1, static_cast<double>(V1)); \ |
| 71 } else { \ |
| 72 test.setDwordTo(T0, static_cast<float>(V0)); \ |
| 73 test.setDwordTo(T1, static_cast<float>(V1)); \ |
| 74 } \ |
| 75 \ |
| 76 test.run(); \ |
| 77 \ |
| 78 ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString; \ |
| 79 reset(); \ |
| 80 } while (0) |
| 81 |
| 82 #define TestArithSS(FloatSize, Src, Dst0, Dst1) \ |
| 83 do { \ |
| 84 TestArithSSXmmXmm(FloatSize, Src, 1.0, Dst0, 10.0, addss, +); \ |
| 85 TestArithSSXmmAddr(FloatSize, 2.0, Dst1, 20.0, addss, +); \ |
| 86 TestArithSSXmmXmm(FloatSize, Src, 3.0, Dst0, 30.0, subss, -); \ |
| 87 TestArithSSXmmAddr(FloatSize, 4.0, Dst1, 40.0, subss, -); \ |
| 88 TestArithSSXmmXmm(FloatSize, Src, 5.0, Dst0, 50.0, mulss, *); \ |
| 89 TestArithSSXmmAddr(FloatSize, 6.0, Dst1, 60.0, mulss, *); \ |
| 90 TestArithSSXmmXmm(FloatSize, Src, 7.0, Dst0, 70.0, divss, / ); \ |
| 91 TestArithSSXmmAddr(FloatSize, 8.0, Dst1, 80.0, divss, / ); \ |
| 92 } while (0) |
| 93 |
| 94 #define TestImpl(Src, Dst0, Dst1) \ |
| 95 do { \ |
| 96 TestArithSS(32, Src, Dst0, Dst1); \ |
| 97 TestArithSS(64, Src, Dst0, Dst1); \ |
| 98 } while (0) |
| 99 |
| 100 TestImpl(xmm0, xmm1, xmm2); |
| 101 TestImpl(xmm1, xmm2, xmm3); |
| 102 TestImpl(xmm2, xmm3, xmm4); |
| 103 TestImpl(xmm3, xmm4, xmm5); |
| 104 TestImpl(xmm4, xmm5, xmm6); |
| 105 TestImpl(xmm5, xmm6, xmm7); |
| 106 TestImpl(xmm6, xmm7, xmm8); |
| 107 TestImpl(xmm7, xmm8, xmm9); |
| 108 TestImpl(xmm8, xmm9, xmm10); |
| 109 TestImpl(xmm9, xmm10, xmm11); |
| 110 TestImpl(xmm10, xmm11, xmm12); |
| 111 TestImpl(xmm11, xmm12, xmm13); |
| 112 TestImpl(xmm12, xmm13, xmm14); |
| 113 TestImpl(xmm13, xmm14, xmm15); |
| 114 TestImpl(xmm14, xmm15, xmm0); |
| 115 TestImpl(xmm15, xmm0, xmm1); |
| 116 |
| 117 #undef TestImpl |
| 118 #undef TestArithSS |
| 119 #undef TestArithSSXmmAddr |
| 120 #undef TestArithSSXmmXmm |
| 121 } |
| 122 |
| 123 TEST_F(AssemblerX8664Test, PArith) { |
| 124 #define TestPArithXmmXmm(Dst, Value0, Src, Value1, Inst, Op, Type, Size) \ |
| 125 do { \ |
| 126 static constexpr char TestString[] = \ |
| 127 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op \ |
| 128 ", " #Type ", " #Size ")"; \ |
| 129 const uint32_t T0 = allocateDqword(); \ |
| 130 const Dqword V0 Value0; \ |
| 131 \ |
| 132 const uint32_t T1 = allocateDqword(); \ |
| 133 const Dqword V1 Value1; \ |
| 134 \ |
| 135 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 136 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ |
| 137 __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ |
| 138 \ |
| 139 AssembledTest test = assemble(); \ |
| 140 test.setDqwordTo(T0, V0); \ |
| 141 test.setDqwordTo(T1, V1); \ |
| 142 test.run(); \ |
| 143 \ |
| 144 ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>()) \ |
| 145 << TestString; \ |
| 146 reset(); \ |
| 147 } while (0) |
| 148 |
| 149 #define TestPArithXmmAddr(Dst, Value0, Value1, Inst, Op, Type, Size) \ |
| 150 do { \ |
| 151 static constexpr char TestString[] = \ |
| 152 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op \ |
| 153 ", " #Type ", " #Size ")"; \ |
| 154 const uint32_t T0 = allocateDqword(); \ |
| 155 const Dqword V0 Value0; \ |
| 156 \ |
| 157 const uint32_t T1 = allocateDqword(); \ |
| 158 const Dqword V1 Value1; \ |
| 159 \ |
| 160 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 161 __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \ |
| 162 \ |
| 163 AssembledTest test = assemble(); \ |
| 164 test.setDqwordTo(T0, V0); \ |
| 165 test.setDqwordTo(T1, V1); \ |
| 166 test.run(); \ |
| 167 \ |
| 168 ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>()) \ |
| 169 << TestString; \ |
| 170 reset(); \ |
| 171 } while (0) |
| 172 |
| 173 #define TestPArithXmmImm(Dst, Value0, Imm, Inst, Op, Type, Size) \ |
| 174 do { \ |
| 175 static constexpr char TestString[] = \ |
| 176 "(" #Dst ", " #Value0 ", " #Imm ", " #Inst ", " #Op ", " #Type \ |
| 177 ", " #Size ")"; \ |
| 178 const uint32_t T0 = allocateDqword(); \ |
| 179 const Dqword V0 Value0; \ |
| 180 \ |
| 181 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 182 __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Immediate(Imm)); \ |
| 183 \ |
| 184 AssembledTest test = assemble(); \ |
| 185 test.setDqwordTo(T0, V0); \ |
| 186 test.run(); \ |
| 187 \ |
| 188 ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op Imm, test.Dst<Dqword>()) \ |
| 189 << TestString; \ |
| 190 reset(); \ |
| 191 } while (0) |
| 192 |
| 193 #define TestPAndnXmmXmm(Dst, Value0, Src, Value1, Type, Size) \ |
| 194 do { \ |
| 195 static constexpr char TestString[] = \ |
| 196 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", pandn, " #Type \ |
| 197 ", " #Size ")"; \ |
| 198 const uint32_t T0 = allocateDqword(); \ |
| 199 const Dqword V0 Value0; \ |
| 200 \ |
| 201 const uint32_t T1 = allocateDqword(); \ |
| 202 const Dqword V1 Value1; \ |
| 203 \ |
| 204 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 205 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ |
| 206 __ pandn(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ |
| 207 \ |
| 208 AssembledTest test = assemble(); \ |
| 209 test.setDqwordTo(T0, V0); \ |
| 210 test.setDqwordTo(T1, V1); \ |
| 211 test.run(); \ |
| 212 \ |
| 213 ASSERT_EQ(~(packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>()) \ |
| 214 << TestString; \ |
| 215 reset(); \ |
| 216 } while (0) |
| 217 |
| 218 #define TestPAndnXmmAddr(Dst, Value0, Value1, Type, Size) \ |
| 219 do { \ |
| 220 static constexpr char TestString[] = \ |
| 221 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", pandn, " #Type ", " #Size \ |
| 222 ")"; \ |
| 223 const uint32_t T0 = allocateDqword(); \ |
| 224 const Dqword V0 Value0; \ |
| 225 \ |
| 226 const uint32_t T1 = allocateDqword(); \ |
| 227 const Dqword V1 Value1; \ |
| 228 \ |
| 229 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 230 __ pandn(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \ |
| 231 \ |
| 232 AssembledTest test = assemble(); \ |
| 233 test.setDqwordTo(T0, V0); \ |
| 234 test.setDqwordTo(T1, V1); \ |
| 235 test.run(); \ |
| 236 \ |
| 237 ASSERT_EQ((~packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>()) \ |
| 238 << TestString; \ |
| 239 reset(); \ |
| 240 } while (0) |
| 241 |
| 242 #define TestPArithSize(Dst, Src, Size) \ |
| 243 do { \ |
| 244 static_assert(Size == 8 || Size == 16 || Size == 32, "Invalid size."); \ |
| 245 if (Size != 8) { \ |
| 246 TestPArithXmmXmm( \ |
| 247 Dst, \ |
| 248 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ |
| 249 Src, (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size); \ |
| 250 TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull), \ |
| 251 uint64_t(0x8080404002020101ull)), \ |
| 252 (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size); \ |
| 253 TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull), \ |
| 254 uint64_t(0x8080404002020101ull)), \ |
| 255 3u, psra, >>, int, Size); \ |
| 256 TestPArithXmmXmm( \ |
| 257 Dst, \ |
| 258 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ |
| 259 Src, (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size); \ |
| 260 TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull), \ |
| 261 uint64_t(0x8080404002020101ull)), \ |
| 262 (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size); \ |
| 263 TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull), \ |
| 264 uint64_t(0x8080404002020101ull)), \ |
| 265 3u, psrl, >>, uint, Size); \ |
| 266 TestPArithXmmXmm( \ |
| 267 Dst, \ |
| 268 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ |
| 269 Src, (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size); \ |
| 270 TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull), \ |
| 271 uint64_t(0x8080404002020101ull)), \ |
| 272 (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size); \ |
| 273 TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull), \ |
| 274 uint64_t(0x8080404002020101ull)), \ |
| 275 3u, psll, <<, uint, Size); \ |
| 276 \ |
| 277 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ |
| 278 uint64_t(0x8080404002020101ull)), \ |
| 279 Src, (uint64_t(0xFFFFFFFF00000000ull), \ |
| 280 uint64_t(0x0123456789ABCDEull)), \ |
| 281 pmull, *, int, Size); \ |
| 282 TestPArithXmmAddr( \ |
| 283 Dst, \ |
| 284 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ |
| 285 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ |
| 286 pmull, *, int, Size); \ |
| 287 if (Size != 16) { \ |
| 288 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ |
| 289 uint64_t(0x8080404002020101ull)), \ |
| 290 Src, (uint64_t(0xFFFFFFFF00000000ull), \ |
| 291 uint64_t(0x0123456789ABCDEull)), \ |
| 292 pmuludq, *, uint, Size); \ |
| 293 TestPArithXmmAddr( \ |
| 294 Dst, (uint64_t(0x8040201008040201ull), \ |
| 295 uint64_t(0x8080404002020101ull)), \ |
| 296 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ |
| 297 pmuludq, *, uint, Size); \ |
| 298 } \ |
| 299 } \ |
| 300 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ |
| 301 uint64_t(0x8080404002020101ull)), \ |
| 302 Src, (uint64_t(0xFFFFFFFF00000000ull), \ |
| 303 uint64_t(0x0123456789ABCDEull)), \ |
| 304 padd, +, int, Size); \ |
| 305 TestPArithXmmAddr( \ |
| 306 Dst, \ |
| 307 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ |
| 308 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ |
| 309 padd, +, int, Size); \ |
| 310 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ |
| 311 uint64_t(0x8080404002020101ull)), \ |
| 312 Src, (uint64_t(0xFFFFFFFF00000000ull), \ |
| 313 uint64_t(0x0123456789ABCDEull)), \ |
| 314 psub, -, int, Size); \ |
| 315 TestPArithXmmAddr( \ |
| 316 Dst, \ |
| 317 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ |
| 318 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ |
| 319 psub, -, int, Size); \ |
| 320 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ |
| 321 uint64_t(0x8080404002020101ull)), \ |
| 322 Src, (uint64_t(0xFFFFFFFF00000000ull), \ |
| 323 uint64_t(0x0123456789ABCDEull)), \ |
| 324 pand, &, int, Size); \ |
| 325 TestPArithXmmAddr( \ |
| 326 Dst, \ |
| 327 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ |
| 328 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ |
| 329 pand, &, int, Size); \ |
| 330 \ |
| 331 TestPAndnXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ |
| 332 uint64_t(0x8080404002020101ull)), \ |
| 333 Src, (uint64_t(0xFFFFFFFF00000000ull), \ |
| 334 uint64_t(0x0123456789ABCDEull)), \ |
| 335 int, Size); \ |
| 336 TestPAndnXmmAddr( \ |
| 337 Dst, \ |
| 338 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ |
| 339 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ |
| 340 int, Size); \ |
| 341 \ |
| 342 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ |
| 343 uint64_t(0x8080404002020101ull)), \ |
| 344 Src, (uint64_t(0xFFFFFFFF00000000ull), \ |
| 345 uint64_t(0x0123456789ABCDEull)), \ |
| 346 por, |, int, Size); \ |
| 347 TestPArithXmmAddr( \ |
| 348 Dst, \ |
| 349 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ |
| 350 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ |
| 351 por, |, int, Size); \ |
| 352 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \ |
| 353 uint64_t(0x8080404002020101ull)), \ |
| 354 Src, (uint64_t(0xFFFFFFFF00000000ull), \ |
| 355 uint64_t(0x0123456789ABCDEull)), \ |
| 356 pxor, ^, int, Size); \ |
| 357 TestPArithXmmAddr( \ |
| 358 Dst, \ |
| 359 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \ |
| 360 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \ |
| 361 pxor, ^, int, Size); \ |
| 362 } while (0) |
| 363 |
| 364 #define TestPArith(Src, Dst) \ |
| 365 do { \ |
| 366 TestPArithSize(Src, Dst, 8); \ |
| 367 TestPArithSize(Src, Dst, 16); \ |
| 368 TestPArithSize(Src, Dst, 32); \ |
| 369 } while (0) |
| 370 |
| 371 TestPArith(xmm0, xmm1); |
| 372 TestPArith(xmm1, xmm2); |
| 373 TestPArith(xmm2, xmm3); |
| 374 TestPArith(xmm3, xmm4); |
| 375 TestPArith(xmm4, xmm5); |
| 376 TestPArith(xmm5, xmm6); |
| 377 TestPArith(xmm6, xmm7); |
| 378 TestPArith(xmm7, xmm8); |
| 379 TestPArith(xmm8, xmm9); |
| 380 TestPArith(xmm9, xmm10); |
| 381 TestPArith(xmm10, xmm11); |
| 382 TestPArith(xmm11, xmm12); |
| 383 TestPArith(xmm12, xmm13); |
| 384 TestPArith(xmm13, xmm14); |
| 385 TestPArith(xmm14, xmm15); |
| 386 TestPArith(xmm15, xmm0); |
| 387 |
| 388 #undef TestPArith |
| 389 #undef TestPArithSize |
| 390 #undef TestPAndnXmmAddr |
| 391 #undef TestPAndnXmmXmm |
| 392 #undef TestPArithXmmImm |
| 393 #undef TestPArithXmmAddr |
| 394 #undef TestPArithXmmXmm |
| 395 } |
| 396 |
| 397 TEST_F(AssemblerX8664Test, ArithPS) { |
| 398 #define TestArithPSXmmXmm(Dst, Value0, Src, Value1, Inst, Op, Type) \ |
| 399 do { \ |
| 400 static constexpr char TestString[] = \ |
| 401 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op \ |
| 402 ", " #Type ")"; \ |
| 403 const uint32_t T0 = allocateDqword(); \ |
| 404 const Dqword V0 Value0; \ |
| 405 const uint32_t T1 = allocateDqword(); \ |
| 406 const Dqword V1 Value1; \ |
| 407 \ |
| 408 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 409 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ |
| 410 __ Inst(IceType_f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ |
| 411 \ |
| 412 AssembledTest test = assemble(); \ |
| 413 test.setDqwordTo(T0, V0); \ |
| 414 test.setDqwordTo(T1, V1); \ |
| 415 test.run(); \ |
| 416 \ |
| 417 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \ |
| 418 \ |
| 419 reset(); \ |
| 420 } while (0) |
| 421 |
| 422 #define TestArithPSXmmXmmUntyped(Dst, Value0, Src, Value1, Inst, Op, Type) \ |
| 423 do { \ |
| 424 static constexpr char TestString[] = \ |
| 425 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op \ |
| 426 ", " #Type ")"; \ |
| 427 const uint32_t T0 = allocateDqword(); \ |
| 428 const Dqword V0 Value0; \ |
| 429 const uint32_t T1 = allocateDqword(); \ |
| 430 const Dqword V1 Value1; \ |
| 431 \ |
| 432 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 433 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ |
| 434 __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ |
| 435 \ |
| 436 AssembledTest test = assemble(); \ |
| 437 test.setDqwordTo(T0, V0); \ |
| 438 test.setDqwordTo(T1, V1); \ |
| 439 test.run(); \ |
| 440 \ |
| 441 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \ |
| 442 \ |
| 443 reset(); \ |
| 444 } while (0) |
| 445 |
| 446 #define TestArithPSXmmAddrUntyped(Dst, Value0, Value1, Inst, Op, Type) \ |
| 447 do { \ |
| 448 static constexpr char TestString[] = \ |
| 449 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op \ |
| 450 ", " #Type ")"; \ |
| 451 const uint32_t T0 = allocateDqword(); \ |
| 452 const Dqword V0 Value0; \ |
| 453 const uint32_t T1 = allocateDqword(); \ |
| 454 const Dqword V1 Value1; \ |
| 455 \ |
| 456 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 457 __ Inst(Encoded_Xmm_##Dst(), dwordAddress(T1)); \ |
| 458 \ |
| 459 AssembledTest test = assemble(); \ |
| 460 test.setDqwordTo(T0, V0); \ |
| 461 test.setDqwordTo(T1, V1); \ |
| 462 test.run(); \ |
| 463 \ |
| 464 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \ |
| 465 \ |
| 466 reset(); \ |
| 467 } while (0) |
| 468 |
| 469 #define TestMinMaxPS(Dst, Value0, Src, Value1, Inst, Type) \ |
| 470 do { \ |
| 471 static constexpr char TestString[] = \ |
| 472 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Type \ |
| 473 ")"; \ |
| 474 const uint32_t T0 = allocateDqword(); \ |
| 475 const Dqword V0 Value0; \ |
| 476 const uint32_t T1 = allocateDqword(); \ |
| 477 const Dqword V1 Value1; \ |
| 478 \ |
| 479 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 480 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ |
| 481 __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ |
| 482 \ |
| 483 AssembledTest test = assemble(); \ |
| 484 test.setDqwordTo(T0, V0); \ |
| 485 test.setDqwordTo(T1, V1); \ |
| 486 test.run(); \ |
| 487 \ |
| 488 ASSERT_EQ(packedAs<Type>(V0).Inst(V1), test.Dst<Dqword>()) << TestString; \ |
| 489 \ |
| 490 reset(); \ |
| 491 } while (0) |
| 492 |
| 493 #define TestArithPSXmmAddr(Dst, Value0, Value1, Inst, Op, Type) \ |
| 494 do { \ |
| 495 static constexpr char TestString[] = \ |
| 496 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op \ |
| 497 ", " #Type ")"; \ |
| 498 const uint32_t T0 = allocateDqword(); \ |
| 499 const Dqword V0 Value0; \ |
| 500 const uint32_t T1 = allocateDqword(); \ |
| 501 const Dqword V1 Value1; \ |
| 502 \ |
| 503 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 504 __ Inst(IceType_f32, Encoded_Xmm_##Dst(), dwordAddress(T1)); \ |
| 505 \ |
| 506 AssembledTest test = assemble(); \ |
| 507 test.setDqwordTo(T0, V0); \ |
| 508 test.setDqwordTo(T1, V1); \ |
| 509 test.run(); \ |
| 510 \ |
| 511 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \ |
| 512 \ |
| 513 reset(); \ |
| 514 } while (0) |
| 515 |
| 516 #define TestArithPS(Dst, Src) \ |
| 517 do { \ |
| 518 TestArithPSXmmXmm(Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ |
| 519 (0.55, 0.43, 0.23, 1.21), addps, +, float); \ |
| 520 TestArithPSXmmAddr(Dst, (1.0, 100.0, -1000.0, 20.0), \ |
| 521 (0.55, 0.43, 0.23, 1.21), addps, +, float); \ |
| 522 TestArithPSXmmXmm(Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ |
| 523 (0.55, 0.43, 0.23, 1.21), subps, -, float); \ |
| 524 TestArithPSXmmAddr(Dst, (1.0, 100.0, -1000.0, 20.0), \ |
| 525 (0.55, 0.43, 0.23, 1.21), subps, -, float); \ |
| 526 TestArithPSXmmXmm(Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ |
| 527 (0.55, 0.43, 0.23, 1.21), mulps, *, float); \ |
| 528 TestArithPSXmmAddr(Dst, (1.0, 100.0, -1000.0, 20.0), \ |
| 529 (0.55, 0.43, 0.23, 1.21), mulps, *, float); \ |
| 530 TestArithPSXmmXmm(Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ |
| 531 (0.55, 0.43, 0.23, 1.21), divps, /, float); \ |
| 532 TestArithPSXmmAddr(Dst, (1.0, 100.0, -1000.0, 20.0), \ |
| 533 (0.55, 0.43, 0.23, 1.21), divps, /, float); \ |
| 534 TestArithPSXmmXmmUntyped(Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ |
| 535 (0.55, 0.43, 0.23, 1.21), andps, &, float); \ |
| 536 TestArithPSXmmAddrUntyped(Dst, (1.0, 100.0, -1000.0, 20.0), \ |
| 537 (0.55, 0.43, 0.23, 1.21), andps, &, float); \ |
| 538 TestArithPSXmmXmmUntyped(Dst, (1.0, -1000.0), Src, (0.55, 1.21), andpd, &, \ |
| 539 double); \ |
| 540 TestArithPSXmmAddrUntyped(Dst, (1.0, -1000.0), (0.55, 1.21), andpd, &, \ |
| 541 double); \ |
| 542 TestArithPSXmmXmmUntyped(Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ |
| 543 (0.55, 0.43, 0.23, 1.21), orps, |, float); \ |
| 544 TestArithPSXmmXmmUntyped(Dst, (1.0, -1000.0), Src, (0.55, 1.21), orpd, |, \ |
| 545 double); \ |
| 546 TestMinMaxPS(Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ |
| 547 (0.55, 0.43, 0.23, 1.21), minps, float); \ |
| 548 TestMinMaxPS(Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ |
| 549 (0.55, 0.43, 0.23, 1.21), maxps, float); \ |
| 550 TestMinMaxPS(Dst, (1.0, -1000.0), Src, (0.55, 1.21), minpd, double); \ |
| 551 TestMinMaxPS(Dst, (1.0, -1000.0), Src, (0.55, 1.21), maxpd, double); \ |
| 552 TestArithPSXmmXmmUntyped(Dst, (1.0, 100.0, -1000.0, 20.0), Src, \ |
| 553 (0.55, 0.43, 0.23, 1.21), xorps, ^, float); \ |
| 554 TestArithPSXmmAddrUntyped(Dst, (1.0, 100.0, -1000.0, 20.0), \ |
| 555 (0.55, 0.43, 0.23, 1.21), xorps, ^, float); \ |
| 556 TestArithPSXmmXmmUntyped(Dst, (1.0, -1000.0), Src, (0.55, 1.21), xorpd, ^, \ |
| 557 double); \ |
| 558 TestArithPSXmmAddrUntyped(Dst, (1.0, -1000.0), (0.55, 1.21), xorpd, ^, \ |
| 559 double); \ |
| 560 } while (0) |
| 561 |
| 562 TestArithPS(xmm0, xmm1); |
| 563 TestArithPS(xmm1, xmm2); |
| 564 TestArithPS(xmm2, xmm3); |
| 565 TestArithPS(xmm3, xmm4); |
| 566 TestArithPS(xmm4, xmm5); |
| 567 TestArithPS(xmm5, xmm6); |
| 568 TestArithPS(xmm6, xmm7); |
| 569 TestArithPS(xmm7, xmm8); |
| 570 TestArithPS(xmm8, xmm9); |
| 571 TestArithPS(xmm9, xmm10); |
| 572 TestArithPS(xmm10, xmm11); |
| 573 TestArithPS(xmm11, xmm12); |
| 574 TestArithPS(xmm12, xmm13); |
| 575 TestArithPS(xmm13, xmm14); |
| 576 TestArithPS(xmm14, xmm15); |
| 577 TestArithPS(xmm15, xmm0); |
| 578 |
| 579 #undef TestArithPs |
| 580 #undef TestMinMaxPS |
| 581 #undef TestArithPSXmmXmmUntyped |
| 582 #undef TestArithPSXmmAddr |
| 583 #undef TestArithPSXmmXmm |
| 584 } |
| 585 |
| 586 TEST_F(AssemblerX8664Test, Blending) { |
| 587 using f32 = float; |
| 588 using i8 = uint8_t; |
| 589 |
| 590 #define TestBlendingXmmXmm(Dst, Value0, Src, Value1, M /*ask*/, Inst, Type) \ |
| 591 do { \ |
| 592 static constexpr char TestString[] = \ |
| 593 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #M ", " #Inst \ |
| 594 ", " #Type ")"; \ |
| 595 const uint32_t T0 = allocateDqword(); \ |
| 596 const Dqword V0 Value0; \ |
| 597 const uint32_t T1 = allocateDqword(); \ |
| 598 const Dqword V1 Value1; \ |
| 599 const uint32_t Mask = allocateDqword(); \ |
| 600 const Dqword MaskValue M; \ |
| 601 \ |
| 602 __ movups(Encoded_Xmm_xmm0(), dwordAddress(Mask)); \ |
| 603 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 604 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ |
| 605 __ Inst(IceType_##Type, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ |
| 606 \ |
| 607 AssembledTest test = assemble(); \ |
| 608 test.setDqwordTo(T0, V0); \ |
| 609 test.setDqwordTo(T1, V1); \ |
| 610 test.setDqwordTo(Mask, MaskValue); \ |
| 611 test.run(); \ |
| 612 \ |
| 613 ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \ |
| 614 << TestString; \ |
| 615 reset(); \ |
| 616 } while (0) |
| 617 |
| 618 #define TestBlendingXmmAddr(Dst, Value0, Value1, M /*ask*/, Inst, Type) \ |
| 619 do { \ |
| 620 static constexpr char TestString[] = \ |
| 621 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #M ", " #Inst ", " #Type \ |
| 622 ")"; \ |
| 623 const uint32_t T0 = allocateDqword(); \ |
| 624 const Dqword V0 Value0; \ |
| 625 const uint32_t T1 = allocateDqword(); \ |
| 626 const Dqword V1 Value1; \ |
| 627 const uint32_t Mask = allocateDqword(); \ |
| 628 const Dqword MaskValue M; \ |
| 629 \ |
| 630 __ movups(Encoded_Xmm_xmm0(), dwordAddress(Mask)); \ |
| 631 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 632 __ Inst(IceType_##Type, Encoded_Xmm_##Dst(), dwordAddress(T1)); \ |
| 633 \ |
| 634 AssembledTest test = assemble(); \ |
| 635 test.setDqwordTo(T0, V0); \ |
| 636 test.setDqwordTo(T1, V1); \ |
| 637 test.setDqwordTo(Mask, MaskValue); \ |
| 638 test.run(); \ |
| 639 \ |
| 640 ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \ |
| 641 << TestString; \ |
| 642 reset(); \ |
| 643 } while (0) |
| 644 |
| 645 #define TestBlending(Src, Dst) \ |
| 646 do { \ |
| 647 TestBlendingXmmXmm( \ |
| 648 Dst, (1.0, 2.0, 1.0, 2.0), Src, (-1.0, -2.0, -1.0, -2.0), \ |
| 649 (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)), \ |
| 650 blendvps, f32); \ |
| 651 TestBlendingXmmAddr( \ |
| 652 Dst, (1.0, 2.0, 1.0, 2.0), (-1.0, -2.0, -1.0, -2.0), \ |
| 653 (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)), \ |
| 654 blendvps, f32); \ |
| 655 TestBlendingXmmXmm( \ |
| 656 Dst, \ |
| 657 (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)), \ |
| 658 Src, \ |
| 659 (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)), \ |
| 660 (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)), \ |
| 661 pblendvb, i8); \ |
| 662 TestBlendingXmmAddr( \ |
| 663 Dst, \ |
| 664 (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)), \ |
| 665 (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)), \ |
| 666 (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)), \ |
| 667 pblendvb, i8); \ |
| 668 } while (0) |
| 669 |
| 670 /* xmm0 is taken. It is the implicit mask . */ |
| 671 TestBlending(xmm1, xmm2); |
| 672 TestBlending(xmm2, xmm3); |
| 673 TestBlending(xmm3, xmm4); |
| 674 TestBlending(xmm4, xmm5); |
| 675 TestBlending(xmm5, xmm6); |
| 676 TestBlending(xmm6, xmm7); |
| 677 TestBlending(xmm7, xmm8); |
| 678 TestBlending(xmm8, xmm9); |
| 679 TestBlending(xmm9, xmm10); |
| 680 TestBlending(xmm10, xmm11); |
| 681 TestBlending(xmm11, xmm12); |
| 682 TestBlending(xmm12, xmm13); |
| 683 TestBlending(xmm13, xmm14); |
| 684 TestBlending(xmm14, xmm15); |
| 685 TestBlending(xmm15, xmm1); |
| 686 |
| 687 #undef TestBlending |
| 688 #undef TestBlendingXmmAddr |
| 689 #undef TestBlendingXmmXmm |
| 690 } |
| 691 |
| 692 TEST_F(AssemblerX8664Test, Cmpps) { |
| 693 #define TestCmppsXmmXmm(Dst, Src, C, Op) \ |
| 694 do { \ |
| 695 static constexpr char TestString[] = \ |
| 696 "(" #Src ", " #Dst ", " #C ", " #Op ")"; \ |
| 697 const uint32_t T0 = allocateDqword(); \ |
| 698 const Dqword V0(-1.0, 1.0, 3.14, 1024.5); \ |
| 699 const uint32_t T1 = allocateDqword(); \ |
| 700 const Dqword V1(-1.0, 1.0, 3.14, 1024.5); \ |
| 701 \ |
| 702 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 703 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ |
| 704 __ cmpps(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), Cond::Cmpps_##C); \ |
| 705 \ |
| 706 AssembledTest test = assemble(); \ |
| 707 test.setDqwordTo(T0, V0); \ |
| 708 test.setDqwordTo(T1, V1); \ |
| 709 test.run(); \ |
| 710 \ |
| 711 ASSERT_EQ(packedAs<float>(V0) Op V1, test.Dst<Dqword>()) << TestString; \ |
| 712 ; \ |
| 713 reset(); \ |
| 714 } while (0) |
| 715 |
| 716 #define TestCmppsXmmAddr(Dst, C, Op) \ |
| 717 do { \ |
| 718 static constexpr char TestString[] = "(" #Dst ", Addr, " #C ", " #Op ")"; \ |
| 719 const uint32_t T0 = allocateDqword(); \ |
| 720 const Dqword V0(-1.0, 1.0, 3.14, 1024.5); \ |
| 721 const uint32_t T1 = allocateDqword(); \ |
| 722 const Dqword V1(-1.0, 1.0, 3.14, 1024.5); \ |
| 723 \ |
| 724 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 725 __ cmpps(Encoded_Xmm_##Dst(), dwordAddress(T1), Cond::Cmpps_##C); \ |
| 726 \ |
| 727 AssembledTest test = assemble(); \ |
| 728 test.setDqwordTo(T0, V0); \ |
| 729 test.setDqwordTo(T1, V1); \ |
| 730 test.run(); \ |
| 731 \ |
| 732 ASSERT_EQ(packedAs<float>(V0) Op V1, test.Dst<Dqword>()) << TestString; \ |
| 733 ; \ |
| 734 reset(); \ |
| 735 } while (0) |
| 736 |
| 737 #define TestCmppsOrdUnordXmmXmm(Dst, Src, C) \ |
| 738 do { \ |
| 739 static constexpr char TestString[] = "(" #Src ", " #Dst ", " #C ")"; \ |
| 740 const uint32_t T0 = allocateDqword(); \ |
| 741 const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(), \ |
| 742 std::numeric_limits<float>::quiet_NaN()); \ |
| 743 const uint32_t T1 = allocateDqword(); \ |
| 744 const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0, \ |
| 745 std::numeric_limits<float>::quiet_NaN()); \ |
| 746 \ |
| 747 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 748 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ |
| 749 __ cmpps(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), Cond::Cmpps_##C); \ |
| 750 \ |
| 751 AssembledTest test = assemble(); \ |
| 752 test.setDqwordTo(T0, V0); \ |
| 753 test.setDqwordTo(T1, V1); \ |
| 754 test.run(); \ |
| 755 \ |
| 756 ASSERT_EQ(packedAs<float>(V0).C(V1), test.Dst<Dqword>()) << TestString; \ |
| 757 ; \ |
| 758 reset(); \ |
| 759 } while (0) |
| 760 |
| 761 #define TestCmppsOrdUnordXmmAddr(Dst, C) \ |
| 762 do { \ |
| 763 static constexpr char TestString[] = "(" #Dst ", " #C ")"; \ |
| 764 const uint32_t T0 = allocateDqword(); \ |
| 765 const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(), \ |
| 766 std::numeric_limits<float>::quiet_NaN()); \ |
| 767 const uint32_t T1 = allocateDqword(); \ |
| 768 const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0, \ |
| 769 std::numeric_limits<float>::quiet_NaN()); \ |
| 770 \ |
| 771 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 772 __ cmpps(Encoded_Xmm_##Dst(), dwordAddress(T1), Cond::Cmpps_##C); \ |
| 773 \ |
| 774 AssembledTest test = assemble(); \ |
| 775 test.setDqwordTo(T0, V0); \ |
| 776 test.setDqwordTo(T1, V1); \ |
| 777 test.run(); \ |
| 778 \ |
| 779 ASSERT_EQ(packedAs<float>(V0).C(V1), test.Dst<Dqword>()) << TestString; \ |
| 780 ; \ |
| 781 reset(); \ |
| 782 } while (0) |
| 783 |
| 784 #define TestCmpps(Dst, Src) \ |
| 785 do { \ |
| 786 TestCmppsXmmXmm(Dst, Src, eq, == ); \ |
| 787 TestCmppsXmmAddr(Dst, eq, == ); \ |
| 788 TestCmppsXmmXmm(Dst, Src, eq, == ); \ |
| 789 TestCmppsXmmAddr(Dst, eq, == ); \ |
| 790 TestCmppsXmmXmm(Dst, Src, eq, == ); \ |
| 791 TestCmppsXmmAddr(Dst, eq, == ); \ |
| 792 TestCmppsOrdUnordXmmXmm(Dst, Src, unord); \ |
| 793 TestCmppsOrdUnordXmmAddr(Dst, unord); \ |
| 794 TestCmppsXmmXmm(Dst, Src, eq, == ); \ |
| 795 TestCmppsXmmAddr(Dst, eq, == ); \ |
| 796 TestCmppsXmmXmm(Dst, Src, eq, == ); \ |
| 797 TestCmppsXmmAddr(Dst, eq, == ); \ |
| 798 TestCmppsXmmXmm(Dst, Src, eq, == ); \ |
| 799 TestCmppsXmmAddr(Dst, eq, == ); \ |
| 800 TestCmppsOrdUnordXmmXmm(Dst, Src, unord); \ |
| 801 TestCmppsOrdUnordXmmAddr(Dst, unord); \ |
| 802 } while (0) |
| 803 |
| 804 TestCmpps(xmm0, xmm1); |
| 805 TestCmpps(xmm1, xmm2); |
| 806 TestCmpps(xmm2, xmm3); |
| 807 TestCmpps(xmm3, xmm4); |
| 808 TestCmpps(xmm4, xmm5); |
| 809 TestCmpps(xmm5, xmm6); |
| 810 TestCmpps(xmm6, xmm7); |
| 811 TestCmpps(xmm7, xmm8); |
| 812 TestCmpps(xmm8, xmm9); |
| 813 TestCmpps(xmm9, xmm10); |
| 814 TestCmpps(xmm10, xmm11); |
| 815 TestCmpps(xmm11, xmm12); |
| 816 TestCmpps(xmm12, xmm13); |
| 817 TestCmpps(xmm13, xmm14); |
| 818 TestCmpps(xmm14, xmm15); |
| 819 TestCmpps(xmm15, xmm0); |
| 820 |
| 821 #undef TestCmpps |
| 822 #undef TestCmppsOrdUnordXmmAddr |
| 823 #undef TestCmppsOrdUnordXmmXmm |
| 824 #undef TestCmppsXmmAddr |
| 825 #undef TestCmppsXmmXmm |
| 826 } |
| 827 |
| 828 TEST_F(AssemblerX8664Test, Sqrtps_Rsqrtps_Reciprocalps_Sqrtpd) { |
| 829 #define TestImplSingle(Dst, Inst, Expect) \ |
| 830 do { \ |
| 831 static constexpr char TestString[] = "(" #Dst ", " #Inst ")"; \ |
| 832 const uint32_t T0 = allocateDqword(); \ |
| 833 const Dqword V0(1.0, 4.0, 20.0, 3.14); \ |
| 834 \ |
| 835 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 836 __ Inst(Encoded_Xmm_##Dst()); \ |
| 837 \ |
| 838 AssembledTest test = assemble(); \ |
| 839 test.setDqwordTo(T0, V0); \ |
| 840 test.run(); \ |
| 841 ASSERT_EQ(Dqword Expect, test.Dst<Dqword>()) << TestString; \ |
| 842 reset(); \ |
| 843 } while (0) |
| 844 |
| 845 #define TestImpl(Dst) \ |
| 846 do { \ |
| 847 TestImplSingle(Dst, sqrtps, (uint64_t(0x400000003F800000ull), \ |
| 848 uint64_t(0x3FE2D10B408F1BBDull))); \ |
| 849 TestImplSingle(Dst, rsqrtps, (uint64_t(0x3EFFF0003F7FF000ull), \ |
| 850 uint64_t(0x3F1078003E64F000ull))); \ |
| 851 TestImplSingle(Dst, reciprocalps, (uint64_t(0x3E7FF0003F7FF000ull), \ |
| 852 uint64_t(0x3EA310003D4CC000ull))); \ |
| 853 \ |
| 854 TestImplSingle(Dst, sqrtpd, (uint64_t(0x4036A09E9365F5F3ull), \ |
| 855 uint64_t(0x401C42FAE40282A8ull))); \ |
| 856 } while (0) |
| 857 |
| 858 TestImpl(xmm0); |
| 859 TestImpl(xmm1); |
| 860 TestImpl(xmm2); |
| 861 TestImpl(xmm3); |
| 862 TestImpl(xmm4); |
| 863 TestImpl(xmm5); |
| 864 TestImpl(xmm6); |
| 865 TestImpl(xmm7); |
| 866 TestImpl(xmm8); |
| 867 TestImpl(xmm9); |
| 868 TestImpl(xmm10); |
| 869 TestImpl(xmm11); |
| 870 TestImpl(xmm12); |
| 871 TestImpl(xmm13); |
| 872 TestImpl(xmm14); |
| 873 TestImpl(xmm15); |
| 874 |
| 875 #undef TestImpl |
| 876 #undef TestImplSingle |
| 877 } |
| 878 |
| 879 TEST_F(AssemblerX8664Test, Unpck) { |
| 880 const Dqword V0(uint64_t(0xAAAAAAAABBBBBBBBull), |
| 881 uint64_t(0xCCCCCCCCDDDDDDDDull)); |
| 882 const Dqword V1(uint64_t(0xEEEEEEEEFFFFFFFFull), |
| 883 uint64_t(0x9999999988888888ull)); |
| 884 |
| 885 const Dqword unpcklpsExpected(uint64_t(0xFFFFFFFFBBBBBBBBull), |
| 886 uint64_t(0xEEEEEEEEAAAAAAAAull)); |
| 887 const Dqword unpcklpdExpected(uint64_t(0xAAAAAAAABBBBBBBBull), |
| 888 uint64_t(0xEEEEEEEEFFFFFFFFull)); |
| 889 const Dqword unpckhpsExpected(uint64_t(0x88888888DDDDDDDDull), |
| 890 uint64_t(0x99999999CCCCCCCCull)); |
| 891 const Dqword unpckhpdExpected(uint64_t(0xCCCCCCCCDDDDDDDDull), |
| 892 uint64_t(0x9999999988888888ull)); |
| 893 |
| 894 #define TestImplSingle(Dst, Src, Inst) \ |
| 895 do { \ |
| 896 static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \ |
| 897 const uint32_t T0 = allocateDqword(); \ |
| 898 const uint32_t T1 = allocateDqword(); \ |
| 899 \ |
| 900 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 901 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ |
| 902 __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ |
| 903 \ |
| 904 AssembledTest test = assemble(); \ |
| 905 test.setDqwordTo(T0, V0); \ |
| 906 test.setDqwordTo(T1, V1); \ |
| 907 test.run(); \ |
| 908 \ |
| 909 ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString; \ |
| 910 reset(); \ |
| 911 } while (0) |
| 912 |
| 913 #define TestImpl(Dst, Src) \ |
| 914 do { \ |
| 915 TestImplSingle(Dst, Src, unpcklps); \ |
| 916 TestImplSingle(Dst, Src, unpcklpd); \ |
| 917 TestImplSingle(Dst, Src, unpckhps); \ |
| 918 TestImplSingle(Dst, Src, unpckhpd); \ |
| 919 } while (0) |
| 920 |
| 921 TestImpl(xmm0, xmm1); |
| 922 TestImpl(xmm1, xmm2); |
| 923 TestImpl(xmm2, xmm3); |
| 924 TestImpl(xmm3, xmm4); |
| 925 TestImpl(xmm4, xmm5); |
| 926 TestImpl(xmm5, xmm6); |
| 927 TestImpl(xmm6, xmm7); |
| 928 TestImpl(xmm7, xmm8); |
| 929 TestImpl(xmm8, xmm9); |
| 930 TestImpl(xmm9, xmm10); |
| 931 TestImpl(xmm10, xmm11); |
| 932 TestImpl(xmm11, xmm12); |
| 933 TestImpl(xmm12, xmm13); |
| 934 TestImpl(xmm13, xmm14); |
| 935 TestImpl(xmm14, xmm15); |
| 936 TestImpl(xmm15, xmm0); |
| 937 |
| 938 #undef TestImpl |
| 939 #undef TestImplSingle |
| 940 } |
| 941 |
| 942 TEST_F(AssemblerX8664Test, Shufp) { |
| 943 const Dqword V0(uint64_t(0x1111111122222222ull), |
| 944 uint64_t(0x5555555577777777ull)); |
| 945 const Dqword V1(uint64_t(0xAAAAAAAABBBBBBBBull), |
| 946 uint64_t(0xCCCCCCCCDDDDDDDDull)); |
| 947 |
| 948 const uint8_t pshufdImm = 0x63; |
| 949 const Dqword pshufdExpected(uint64_t(0xBBBBBBBBCCCCCCCCull), |
| 950 uint64_t(0xAAAAAAAADDDDDDDDull)); |
| 951 |
| 952 const uint8_t shufpsImm = 0xf9; |
| 953 const Dqword shufpsExpected(uint64_t(0x7777777711111111ull), |
| 954 uint64_t(0xCCCCCCCCCCCCCCCCull)); |
| 955 |
| 956 #define TestImplSingleXmmXmm(Dst, Src, Inst) \ |
| 957 do { \ |
| 958 static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \ |
| 959 const uint32_t T0 = allocateDqword(); \ |
| 960 const uint32_t T1 = allocateDqword(); \ |
| 961 \ |
| 962 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 963 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ |
| 964 __ Inst(IceType_f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \ |
| 965 Immediate(Inst##Imm)); \ |
| 966 \ |
| 967 AssembledTest test = assemble(); \ |
| 968 test.setDqwordTo(T0, V0); \ |
| 969 test.setDqwordTo(T1, V1); \ |
| 970 test.run(); \ |
| 971 \ |
| 972 ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString; \ |
| 973 reset(); \ |
| 974 } while (0) |
| 975 |
| 976 #define TestImplSingleXmmAddr(Dst, Inst) \ |
| 977 do { \ |
| 978 static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")"; \ |
| 979 const uint32_t T0 = allocateDqword(); \ |
| 980 const uint32_t T1 = allocateDqword(); \ |
| 981 \ |
| 982 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 983 __ Inst(IceType_f32, Encoded_Xmm_##Dst(), dwordAddress(T1), \ |
| 984 Immediate(Inst##Imm)); \ |
| 985 \ |
| 986 AssembledTest test = assemble(); \ |
| 987 test.setDqwordTo(T0, V0); \ |
| 988 test.setDqwordTo(T1, V1); \ |
| 989 test.run(); \ |
| 990 \ |
| 991 ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString; \ |
| 992 reset(); \ |
| 993 } while (0) |
| 994 |
| 995 #define TestImplSingleXmmXmmUntyped(Dst, Src, Inst) \ |
| 996 do { \ |
| 997 static constexpr char TestString[] = \ |
| 998 "(" #Dst ", " #Src ", " #Inst ", Untyped)"; \ |
| 999 const uint32_t T0 = allocateDqword(); \ |
| 1000 const uint32_t T1 = allocateDqword(); \ |
| 1001 \ |
| 1002 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 1003 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ |
| 1004 __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), Immediate(Inst##Imm)); \ |
| 1005 \ |
| 1006 AssembledTest test = assemble(); \ |
| 1007 test.setDqwordTo(T0, V0); \ |
| 1008 test.setDqwordTo(T1, V1); \ |
| 1009 test.run(); \ |
| 1010 \ |
| 1011 ASSERT_EQ(Inst##UntypedExpected, test.Dst<Dqword>()) << TestString; \ |
| 1012 reset(); \ |
| 1013 } while (0) |
| 1014 |
| 1015 #define TestImpl(Dst, Src) \ |
| 1016 do { \ |
| 1017 TestImplSingleXmmXmm(Dst, Src, pshufd); \ |
| 1018 TestImplSingleXmmAddr(Dst, pshufd); \ |
| 1019 TestImplSingleXmmXmm(Dst, Src, shufps); \ |
| 1020 TestImplSingleXmmAddr(Dst, shufps); \ |
| 1021 } while (0) |
| 1022 |
| 1023 TestImpl(xmm0, xmm1); |
| 1024 TestImpl(xmm1, xmm2); |
| 1025 TestImpl(xmm2, xmm3); |
| 1026 TestImpl(xmm3, xmm4); |
| 1027 TestImpl(xmm4, xmm5); |
| 1028 TestImpl(xmm5, xmm6); |
| 1029 TestImpl(xmm6, xmm7); |
| 1030 TestImpl(xmm7, xmm8); |
| 1031 TestImpl(xmm8, xmm9); |
| 1032 TestImpl(xmm9, xmm10); |
| 1033 TestImpl(xmm10, xmm11); |
| 1034 TestImpl(xmm11, xmm12); |
| 1035 TestImpl(xmm12, xmm13); |
| 1036 TestImpl(xmm13, xmm14); |
| 1037 TestImpl(xmm14, xmm15); |
| 1038 TestImpl(xmm15, xmm0); |
| 1039 |
| 1040 #undef TestImpl |
| 1041 #undef TestImplSingleXmmXmmUntyped |
| 1042 #undef TestImplSingleXmmAddr |
| 1043 #undef TestImplSingleXmmXmm |
| 1044 } |
| 1045 |
| 1046 TEST_F(AssemblerX8664Test, Cvt) { |
| 1047 const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f); |
| 1048 const Dqword dq2ps32SrcValue(-5, 3, 100, 200); |
| 1049 const Dqword dq2ps32Expected(-5.0f, 3.0f, 100.0, 200.0); |
| 1050 |
| 1051 const Dqword dq2ps64DstValue(0.0f, 0.0f, -1.0f, -1.0f); |
| 1052 const Dqword dq2ps64SrcValue(-5, 3, 100, 200); |
| 1053 const Dqword dq2ps64Expected(-5.0f, 3.0f, 100.0, 200.0); |
| 1054 |
| 1055 const Dqword tps2dq32DstValue(-1.0f, -1.0f, -1.0f, -1.0f); |
| 1056 const Dqword tps2dq32SrcValue(-5.0f, 3.0f, 100.0, 200.0); |
| 1057 const Dqword tps2dq32Expected(-5, 3, 100, 200); |
| 1058 |
| 1059 const Dqword tps2dq64DstValue(-1.0f, -1.0f, -1.0f, -1.0f); |
| 1060 const Dqword tps2dq64SrcValue(-5.0f, 3.0f, 100.0, 200.0); |
| 1061 const Dqword tps2dq64Expected(-5, 3, 100, 200); |
| 1062 |
| 1063 const Dqword si2ss32DstValue(-1.0f, -1.0f, -1.0f, -1.0f); |
| 1064 const int32_t si2ss32SrcValue = 5; |
| 1065 const Dqword si2ss32Expected(5.0f, -1.0f, -1.0f, -1.0f); |
| 1066 |
| 1067 const Dqword si2ss64DstValue(-1.0, -1.0); |
| 1068 const int32_t si2ss64SrcValue = 5; |
| 1069 const Dqword si2ss64Expected(5.0, -1.0); |
| 1070 |
| 1071 const int32_t tss2si32DstValue = 0xF00F0FF0; |
| 1072 const Dqword tss2si32SrcValue(-5.0f, -1.0f, -1.0f, -1.0f); |
| 1073 const int32_t tss2si32Expected = -5; |
| 1074 |
| 1075 const int32_t tss2si64DstValue = 0xF00F0FF0; |
| 1076 const Dqword tss2si64SrcValue(-5.0, -1.0); |
| 1077 const int32_t tss2si64Expected = -5; |
| 1078 |
| 1079 const Dqword float2float32DstValue(-1.0, -1.0); |
| 1080 const Dqword float2float32SrcValue(-5.0, 3, 100, 200); |
| 1081 const Dqword float2float32Expected(-5.0, -1.0); |
| 1082 |
| 1083 const Dqword float2float64DstValue(-1.0, -1.0, -1.0, -1.0); |
| 1084 const Dqword float2float64SrcValue(-5.0, 3.0); |
| 1085 const Dqword float2float64Expected(-5.0, -1.0, -1.0, -1.0); |
| 1086 |
| 1087 #define TestImplPXmmXmm(Dst, Src, Inst, Size) \ |
| 1088 do { \ |
| 1089 static constexpr char TestString[] = \ |
| 1090 "(" #Dst ", " #Src ", cvt" #Inst ", f" #Size ")"; \ |
| 1091 const uint32_t T0 = allocateDqword(); \ |
| 1092 const uint32_t T1 = allocateDqword(); \ |
| 1093 \ |
| 1094 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 1095 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ |
| 1096 __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ |
| 1097 \ |
| 1098 AssembledTest test = assemble(); \ |
| 1099 test.setDqwordTo(T0, Inst##Size##DstValue); \ |
| 1100 test.setDqwordTo(T1, Inst##Size##SrcValue); \ |
| 1101 test.run(); \ |
| 1102 \ |
| 1103 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \ |
| 1104 reset(); \ |
| 1105 } while (0) |
| 1106 |
| 1107 #define TestImplSXmmReg(Dst, GPR, Inst, Size) \ |
| 1108 do { \ |
| 1109 static constexpr char TestString[] = \ |
| 1110 "(" #Dst ", " #GPR ", cvt" #Inst ", f" #Size ")"; \ |
| 1111 const uint32_t T0 = allocateDqword(); \ |
| 1112 \ |
| 1113 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 1114 __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##SrcValue)); \ |
| 1115 __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_GPR_##GPR()); \ |
| 1116 \ |
| 1117 AssembledTest test = assemble(); \ |
| 1118 test.setDqwordTo(T0, Inst##Size##DstValue); \ |
| 1119 test.run(); \ |
| 1120 \ |
| 1121 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \ |
| 1122 reset(); \ |
| 1123 } while (0) |
| 1124 |
| 1125 #define TestImplSRegXmm(GPR, Src, Inst, Size) \ |
| 1126 do { \ |
| 1127 static constexpr char TestString[] = \ |
| 1128 "(" #GPR ", " #Src ", cvt" #Inst ", f" #Size ")"; \ |
| 1129 const uint32_t T0 = allocateDqword(); \ |
| 1130 \ |
| 1131 __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \ |
| 1132 __ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \ |
| 1133 __ cvt##Inst(IceType_f##Size, Encoded_GPR_##GPR(), Encoded_Xmm_##Src()); \ |
| 1134 \ |
| 1135 AssembledTest test = assemble(); \ |
| 1136 test.setDqwordTo(T0, Inst##Size##SrcValue); \ |
| 1137 test.run(); \ |
| 1138 \ |
| 1139 ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR()) \ |
| 1140 << TestString; \ |
| 1141 reset(); \ |
| 1142 } while (0) |
| 1143 |
| 1144 #define TestImplPXmmAddr(Dst, Inst, Size) \ |
| 1145 do { \ |
| 1146 static constexpr char TestString[] = \ |
| 1147 "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")"; \ |
| 1148 const uint32_t T0 = allocateDqword(); \ |
| 1149 const uint32_t T1 = allocateDqword(); \ |
| 1150 \ |
| 1151 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 1152 __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \ |
| 1153 \ |
| 1154 AssembledTest test = assemble(); \ |
| 1155 test.setDqwordTo(T0, Inst##Size##DstValue); \ |
| 1156 test.setDqwordTo(T1, Inst##Size##SrcValue); \ |
| 1157 test.run(); \ |
| 1158 \ |
| 1159 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \ |
| 1160 reset(); \ |
| 1161 } while (0) |
| 1162 |
| 1163 #define TestImplSXmmAddr(Dst, Inst, Size) \ |
| 1164 do { \ |
| 1165 static constexpr char TestString[] = \ |
| 1166 "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")"; \ |
| 1167 const uint32_t T0 = allocateDqword(); \ |
| 1168 const uint32_t T1 = allocateDword(); \ |
| 1169 \ |
| 1170 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 1171 __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \ |
| 1172 \ |
| 1173 AssembledTest test = assemble(); \ |
| 1174 test.setDqwordTo(T0, Inst##Size##DstValue); \ |
| 1175 test.setDwordTo(T1, Inst##Size##SrcValue); \ |
| 1176 test.run(); \ |
| 1177 \ |
| 1178 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \ |
| 1179 reset(); \ |
| 1180 } while (0) |
| 1181 |
| 1182 #define TestImplSRegAddr(GPR, Inst, Size) \ |
| 1183 do { \ |
| 1184 static constexpr char TestString[] = \ |
| 1185 "(" #GPR ", Addr, cvt" #Inst ", f" #Size ")"; \ |
| 1186 const uint32_t T0 = allocateDqword(); \ |
| 1187 \ |
| 1188 __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \ |
| 1189 __ cvt##Inst(IceType_f##Size, Encoded_GPR_##GPR(), dwordAddress(T0)); \ |
| 1190 \ |
| 1191 AssembledTest test = assemble(); \ |
| 1192 test.setDqwordTo(T0, Inst##Size##SrcValue); \ |
| 1193 test.run(); \ |
| 1194 \ |
| 1195 ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR()) \ |
| 1196 << TestString; \ |
| 1197 reset(); \ |
| 1198 } while (0) |
| 1199 |
| 1200 #define TestImplSize(Dst, Src, GPR, Size) \ |
| 1201 do { \ |
| 1202 TestImplPXmmXmm(Dst, Src, dq2ps, Size); \ |
| 1203 TestImplPXmmAddr(Src, dq2ps, Size); \ |
| 1204 TestImplPXmmXmm(Dst, Src, tps2dq, Size); \ |
| 1205 TestImplPXmmAddr(Src, tps2dq, Size); \ |
| 1206 TestImplSXmmReg(Dst, GPR, si2ss, Size); \ |
| 1207 TestImplSXmmAddr(Dst, si2ss, Size); \ |
| 1208 TestImplSRegXmm(GPR, Src, tss2si, Size); \ |
| 1209 TestImplSRegAddr(GPR, tss2si, Size); \ |
| 1210 TestImplPXmmXmm(Dst, Src, float2float, Size); \ |
| 1211 TestImplPXmmAddr(Src, float2float, Size); \ |
| 1212 } while (0) |
| 1213 |
| 1214 #define TestImpl(Dst, Src, GPR) \ |
| 1215 do { \ |
| 1216 TestImplSize(Dst, Src, GPR, 32); \ |
| 1217 TestImplSize(Dst, Src, GPR, 64); \ |
| 1218 } while (0) |
| 1219 |
| 1220 TestImpl(xmm0, xmm1, r1); |
| 1221 TestImpl(xmm1, xmm2, r2); |
| 1222 TestImpl(xmm2, xmm3, r3); |
| 1223 TestImpl(xmm3, xmm4, r4); |
| 1224 TestImpl(xmm4, xmm5, r5); |
| 1225 TestImpl(xmm5, xmm6, r6); |
| 1226 TestImpl(xmm6, xmm7, r7); |
| 1227 TestImpl(xmm7, xmm8, r8); |
| 1228 TestImpl(xmm8, xmm9, r10); |
| 1229 TestImpl(xmm9, xmm10, r11); |
| 1230 TestImpl(xmm10, xmm11, r12); |
| 1231 TestImpl(xmm11, xmm12, r13); |
| 1232 TestImpl(xmm12, xmm13, r14); |
| 1233 TestImpl(xmm13, xmm14, r15); |
| 1234 TestImpl(xmm14, xmm15, r1); |
| 1235 TestImpl(xmm15, xmm0, r2); |
| 1236 |
| 1237 #undef TestImpl |
| 1238 #undef TestImplSize |
| 1239 #undef TestImplSRegAddr |
| 1240 #undef TestImplSXmmAddr |
| 1241 #undef TestImplPXmmAddr |
| 1242 #undef TestImplSRegXmm |
| 1243 #undef TestImplSXmmReg |
| 1244 #undef TestImplPXmmXmm |
| 1245 } |
| 1246 |
| 1247 TEST_F(AssemblerX8664Test, Ucomiss) { |
| 1248 static constexpr float qnan32 = std::numeric_limits<float>::quiet_NaN(); |
| 1249 static constexpr double qnan64 = std::numeric_limits<float>::quiet_NaN(); |
| 1250 |
| 1251 Dqword test32DstValue(0.0, qnan32, qnan32, qnan32); |
| 1252 Dqword test32SrcValue(0.0, qnan32, qnan32, qnan32); |
| 1253 |
| 1254 Dqword test64DstValue(0.0, qnan64); |
| 1255 Dqword test64SrcValue(0.0, qnan64); |
| 1256 |
| 1257 #define TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity, \ |
| 1258 BOther) \ |
| 1259 do { \ |
| 1260 static constexpr char NearBranch = AssemblerX8664::kNearJump; \ |
| 1261 static constexpr char TestString[] = \ |
| 1262 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #CompType \ |
| 1263 ", " #BParity ", " #BOther ")"; \ |
| 1264 const uint32_t T0 = allocateDqword(); \ |
| 1265 test##Size##DstValue.F##Size[0] = Value0; \ |
| 1266 const uint32_t T1 = allocateDqword(); \ |
| 1267 test##Size##SrcValue.F##Size[0] = Value1; \ |
| 1268 const uint32_t ImmIfTrue = 0xBEEF; \ |
| 1269 const uint32_t ImmIfFalse = 0xC0FFE; \ |
| 1270 \ |
| 1271 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 1272 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ |
| 1273 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse)); \ |
| 1274 __ ucomiss(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ |
| 1275 Label Done; \ |
| 1276 __ j(Cond::Br_##BParity, &Done, NearBranch); \ |
| 1277 __ j(Cond::Br_##BOther, &Done, NearBranch); \ |
| 1278 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue)); \ |
| 1279 __ bind(&Done); \ |
| 1280 \ |
| 1281 AssembledTest test = assemble(); \ |
| 1282 test.setDqwordTo(T0, test##Size##DstValue); \ |
| 1283 test.setDqwordTo(T1, test##Size##SrcValue); \ |
| 1284 test.run(); \ |
| 1285 \ |
| 1286 ASSERT_EQ(ImmIfTrue, test.eax()) << TestString; \ |
| 1287 reset(); \ |
| 1288 } while (0) |
| 1289 |
| 1290 #define TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther) \ |
| 1291 do { \ |
| 1292 static constexpr char NearBranch = AssemblerX8664::kNearJump; \ |
| 1293 static constexpr char TestString[] = \ |
| 1294 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #CompType \ |
| 1295 ", " #BParity ", " #BOther ")"; \ |
| 1296 const uint32_t T0 = allocateDqword(); \ |
| 1297 test##Size##DstValue.F##Size[0] = Value0; \ |
| 1298 const uint32_t T1 = allocateDqword(); \ |
| 1299 test##Size##SrcValue.F##Size[0] = Value1; \ |
| 1300 const uint32_t ImmIfTrue = 0xBEEF; \ |
| 1301 const uint32_t ImmIfFalse = 0xC0FFE; \ |
| 1302 \ |
| 1303 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 1304 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse)); \ |
| 1305 __ ucomiss(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \ |
| 1306 Label Done; \ |
| 1307 __ j(Cond::Br_##BParity, &Done, NearBranch); \ |
| 1308 __ j(Cond::Br_##BOther, &Done, NearBranch); \ |
| 1309 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue)); \ |
| 1310 __ bind(&Done); \ |
| 1311 \ |
| 1312 AssembledTest test = assemble(); \ |
| 1313 test.setDqwordTo(T0, test##Size##DstValue); \ |
| 1314 test.setDqwordTo(T1, test##Size##SrcValue); \ |
| 1315 test.run(); \ |
| 1316 \ |
| 1317 ASSERT_EQ(ImmIfTrue, test.eax()) << TestString; \ |
| 1318 reset(); \ |
| 1319 } while (0) |
| 1320 |
| 1321 #define TestImplCond(Dst, Value0, Src, Value1, Size, CompType, BParity, \ |
| 1322 BOther) \ |
| 1323 do { \ |
| 1324 TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity, BOther); \ |
| 1325 TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther); \ |
| 1326 } while (0) |
| 1327 |
| 1328 #define TestImplSize(Dst, Src, Size) \ |
| 1329 do { \ |
| 1330 TestImplCond(Dst, 1.0, Src, 1.0, Size, isEq, p, ne); \ |
| 1331 TestImplCond(Dst, 1.0, Src, 2.0, Size, isNe, p, e); \ |
| 1332 TestImplCond(Dst, 1.0, Src, 2.0, Size, isLe, p, a); \ |
| 1333 TestImplCond(Dst, 1.0, Src, 1.0, Size, isLe, p, a); \ |
| 1334 TestImplCond(Dst, 1.0, Src, 2.0, Size, isLt, p, ae); \ |
| 1335 TestImplCond(Dst, 2.0, Src, 1.0, Size, isGe, p, b); \ |
| 1336 TestImplCond(Dst, 1.0, Src, 1.0, Size, isGe, p, b); \ |
| 1337 TestImplCond(Dst, 2.0, Src, 1.0, Size, isGt, p, be); \ |
| 1338 TestImplCond(Dst, qnan##Size, Src, 1.0, Size, isUnord, np, o); \ |
| 1339 TestImplCond(Dst, 1.0, Src, qnan##Size, Size, isUnord, np, s); \ |
| 1340 TestImplCond(Dst, qnan##Size, Src, qnan##Size, Size, isUnord, np, s); \ |
| 1341 } while (0) |
| 1342 |
| 1343 #define TestImpl(Dst, Src) \ |
| 1344 do { \ |
| 1345 TestImplSize(Dst, Src, 32); \ |
| 1346 TestImplSize(Dst, Src, 64); \ |
| 1347 } while (0) |
| 1348 |
| 1349 TestImpl(xmm0, xmm1); |
| 1350 TestImpl(xmm1, xmm2); |
| 1351 TestImpl(xmm2, xmm3); |
| 1352 TestImpl(xmm3, xmm4); |
| 1353 TestImpl(xmm4, xmm5); |
| 1354 TestImpl(xmm5, xmm6); |
| 1355 TestImpl(xmm6, xmm7); |
| 1356 TestImpl(xmm7, xmm8); |
| 1357 TestImpl(xmm8, xmm9); |
| 1358 TestImpl(xmm9, xmm10); |
| 1359 TestImpl(xmm10, xmm11); |
| 1360 TestImpl(xmm11, xmm12); |
| 1361 TestImpl(xmm12, xmm13); |
| 1362 TestImpl(xmm13, xmm14); |
| 1363 TestImpl(xmm14, xmm15); |
| 1364 TestImpl(xmm15, xmm0); |
| 1365 |
| 1366 #undef TestImpl |
| 1367 #undef TestImplSize |
| 1368 #undef TestImplCond |
| 1369 #undef TestImplXmmAddr |
| 1370 #undef TestImplXmmXmm |
| 1371 } |
| 1372 |
| 1373 TEST_F(AssemblerX8664Test, Sqrtss) { |
| 1374 Dqword test32SrcValue(-100.0, -100.0, -100.0, -100.0); |
| 1375 Dqword test32DstValue(-1.0, -1.0, -1.0, -1.0); |
| 1376 |
| 1377 Dqword test64SrcValue(-100.0, -100.0); |
| 1378 Dqword test64DstValue(-1.0, -1.0); |
| 1379 |
| 1380 #define TestSqrtssXmmXmm(Dst, Src, Value1, Result, Size) \ |
| 1381 do { \ |
| 1382 static constexpr char TestString[] = \ |
| 1383 "(" #Dst ", " #Src ", " #Value1 ", " #Result ", " #Size ")"; \ |
| 1384 const uint32_t T0 = allocateDqword(); \ |
| 1385 test##Size##SrcValue.F##Size[0] = Value1; \ |
| 1386 const uint32_t T1 = allocateDqword(); \ |
| 1387 \ |
| 1388 __ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \ |
| 1389 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T1)); \ |
| 1390 __ sqrtss(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ |
| 1391 \ |
| 1392 AssembledTest test = assemble(); \ |
| 1393 test.setDqwordTo(T0, test##Size##SrcValue); \ |
| 1394 test.setDqwordTo(T1, test##Size##DstValue); \ |
| 1395 test.run(); \ |
| 1396 \ |
| 1397 Dqword Expected = test##Size##DstValue; \ |
| 1398 Expected.F##Size[0] = Result; \ |
| 1399 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ |
| 1400 reset(); \ |
| 1401 } while (0) |
| 1402 |
| 1403 #define TestSqrtssXmmAddr(Dst, Value1, Result, Size) \ |
| 1404 do { \ |
| 1405 static constexpr char TestString[] = \ |
| 1406 "(" #Dst ", Addr, " #Value1 ", " #Result ", " #Size ")"; \ |
| 1407 const uint32_t T0 = allocateDqword(); \ |
| 1408 test##Size##SrcValue.F##Size[0] = Value1; \ |
| 1409 const uint32_t T1 = allocateDqword(); \ |
| 1410 \ |
| 1411 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T1)); \ |
| 1412 __ sqrtss(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 1413 \ |
| 1414 AssembledTest test = assemble(); \ |
| 1415 test.setDqwordTo(T0, test##Size##SrcValue); \ |
| 1416 test.setDqwordTo(T1, test##Size##DstValue); \ |
| 1417 test.run(); \ |
| 1418 \ |
| 1419 Dqword Expected = test##Size##DstValue; \ |
| 1420 Expected.F##Size[0] = Result; \ |
| 1421 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ |
| 1422 reset(); \ |
| 1423 } while (0) |
| 1424 |
| 1425 #define TestSqrtssSize(Dst, Src, Size) \ |
| 1426 do { \ |
| 1427 TestSqrtssXmmXmm(Dst, Src, 4.0, 2.0, Size); \ |
| 1428 TestSqrtssXmmAddr(Dst, 4.0, 2.0, Size); \ |
| 1429 TestSqrtssXmmXmm(Dst, Src, 9.0, 3.0, Size); \ |
| 1430 TestSqrtssXmmAddr(Dst, 9.0, 3.0, Size); \ |
| 1431 TestSqrtssXmmXmm(Dst, Src, 100.0, 10.0, Size); \ |
| 1432 TestSqrtssXmmAddr(Dst, 100.0, 10.0, Size); \ |
| 1433 } while (0) |
| 1434 |
| 1435 #define TestSqrtss(Dst, Src) \ |
| 1436 do { \ |
| 1437 TestSqrtssSize(Dst, Src, 32); \ |
| 1438 TestSqrtssSize(Dst, Src, 64); \ |
| 1439 } while (0) |
| 1440 |
| 1441 TestSqrtss(xmm0, xmm1); |
| 1442 TestSqrtss(xmm1, xmm2); |
| 1443 TestSqrtss(xmm2, xmm3); |
| 1444 TestSqrtss(xmm3, xmm4); |
| 1445 TestSqrtss(xmm4, xmm5); |
| 1446 TestSqrtss(xmm5, xmm6); |
| 1447 TestSqrtss(xmm6, xmm7); |
| 1448 TestSqrtss(xmm7, xmm8); |
| 1449 TestSqrtss(xmm8, xmm9); |
| 1450 TestSqrtss(xmm9, xmm10); |
| 1451 TestSqrtss(xmm10, xmm11); |
| 1452 TestSqrtss(xmm11, xmm12); |
| 1453 TestSqrtss(xmm12, xmm13); |
| 1454 TestSqrtss(xmm13, xmm14); |
| 1455 TestSqrtss(xmm14, xmm15); |
| 1456 TestSqrtss(xmm15, xmm0); |
| 1457 |
| 1458 #undef TestSqrtss |
| 1459 #undef TestSqrtssSize |
| 1460 #undef TestSqrtssXmmAddr |
| 1461 #undef TestSqrtssXmmXmm |
| 1462 } |
| 1463 |
| 1464 TEST_F(AssemblerX8664Test, Insertps) { |
| 1465 #define TestInsertpsXmmXmmImm(Dst, Value0, Src, Value1, Imm, Expected) \ |
| 1466 do { \ |
| 1467 static constexpr char TestString[] = \ |
| 1468 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Imm ", " #Expected \ |
| 1469 ")"; \ |
| 1470 const uint32_t T0 = allocateDqword(); \ |
| 1471 const Dqword V0 Value0; \ |
| 1472 const uint32_t T1 = allocateDqword(); \ |
| 1473 const Dqword V1 Value1; \ |
| 1474 \ |
| 1475 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 1476 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ |
| 1477 __ insertps(IceType_v4f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \ |
| 1478 Immediate(Imm)); \ |
| 1479 \ |
| 1480 AssembledTest test = assemble(); \ |
| 1481 test.setDqwordTo(T0, V0); \ |
| 1482 test.setDqwordTo(T1, V1); \ |
| 1483 test.run(); \ |
| 1484 \ |
| 1485 ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString; \ |
| 1486 reset(); \ |
| 1487 } while (0) |
| 1488 |
| 1489 #define TestInsertpsXmmAddrImm(Dst, Value0, Value1, Imm, Expected) \ |
| 1490 do { \ |
| 1491 static constexpr char TestString[] = \ |
| 1492 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Expected ")"; \ |
| 1493 const uint32_t T0 = allocateDqword(); \ |
| 1494 const Dqword V0 Value0; \ |
| 1495 const uint32_t T1 = allocateDqword(); \ |
| 1496 const Dqword V1 Value1; \ |
| 1497 \ |
| 1498 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 1499 __ insertps(IceType_v4f32, Encoded_Xmm_##Dst(), dwordAddress(T1), \ |
| 1500 Immediate(Imm)); \ |
| 1501 \ |
| 1502 AssembledTest test = assemble(); \ |
| 1503 test.setDqwordTo(T0, V0); \ |
| 1504 test.setDqwordTo(T1, V1); \ |
| 1505 test.run(); \ |
| 1506 \ |
| 1507 ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString; \ |
| 1508 reset(); \ |
| 1509 } while (0) |
| 1510 |
| 1511 #define TestInsertps(Dst, Src) \ |
| 1512 do { \ |
| 1513 TestInsertpsXmmXmmImm( \ |
| 1514 Dst, (uint64_t(-1), uint64_t(-1)), Src, \ |
| 1515 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \ |
| 1516 0x99, \ |
| 1517 (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x00000000FFFFFFFFull))); \ |
| 1518 TestInsertpsXmmAddrImm( \ |
| 1519 Dst, (uint64_t(-1), uint64_t(-1)), \ |
| 1520 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \ |
| 1521 0x99, \ |
| 1522 (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x00000000FFFFFFFFull))); \ |
| 1523 TestInsertpsXmmXmmImm( \ |
| 1524 Dst, (uint64_t(-1), uint64_t(-1)), Src, \ |
| 1525 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \ |
| 1526 0x9D, \ |
| 1527 (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x0000000000000000ull))); \ |
| 1528 TestInsertpsXmmAddrImm( \ |
| 1529 Dst, (uint64_t(-1), uint64_t(-1)), \ |
| 1530 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \ |
| 1531 0x9D, \ |
| 1532 (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x0000000000000000ull))); \ |
| 1533 } while (0) |
| 1534 |
| 1535 TestInsertps(xmm0, xmm1); |
| 1536 TestInsertps(xmm1, xmm2); |
| 1537 TestInsertps(xmm2, xmm3); |
| 1538 TestInsertps(xmm3, xmm4); |
| 1539 TestInsertps(xmm4, xmm5); |
| 1540 TestInsertps(xmm5, xmm6); |
| 1541 TestInsertps(xmm6, xmm7); |
| 1542 TestInsertps(xmm7, xmm8); |
| 1543 TestInsertps(xmm8, xmm9); |
| 1544 TestInsertps(xmm9, xmm10); |
| 1545 TestInsertps(xmm10, xmm11); |
| 1546 TestInsertps(xmm11, xmm12); |
| 1547 TestInsertps(xmm12, xmm13); |
| 1548 TestInsertps(xmm13, xmm14); |
| 1549 TestInsertps(xmm14, xmm15); |
| 1550 TestInsertps(xmm15, xmm0); |
| 1551 |
| 1552 #undef TestInsertps |
| 1553 #undef TestInsertpsXmmXmmAddr |
| 1554 #undef TestInsertpsXmmXmmImm |
| 1555 } |
| 1556 |
| 1557 TEST_F(AssemblerX8664Test, Pinsr) { |
| 1558 static constexpr uint8_t Mask32 = 0x03; |
| 1559 static constexpr uint8_t Mask16 = 0x07; |
| 1560 static constexpr uint8_t Mask8 = 0x0F; |
| 1561 |
| 1562 #define TestPinsrXmmGPRImm(Dst, Value0, GPR, Value1, Imm, Size) \ |
| 1563 do { \ |
| 1564 static constexpr char TestString[] = \ |
| 1565 "(" #Dst ", " #Value0 ", " #GPR ", " #Value1 ", " #Imm ", " #Size ")"; \ |
| 1566 const uint32_t T0 = allocateDqword(); \ |
| 1567 const Dqword V0 Value0; \ |
| 1568 \ |
| 1569 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 1570 __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Value1)); \ |
| 1571 __ pinsr(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_GPR_##GPR(), \ |
| 1572 Immediate(Imm)); \ |
| 1573 \ |
| 1574 AssembledTest test = assemble(); \ |
| 1575 test.setDqwordTo(T0, V0); \ |
| 1576 test.run(); \ |
| 1577 \ |
| 1578 constexpr uint8_t sel = (Imm)&Mask##Size; \ |
| 1579 Dqword Expected = V0; \ |
| 1580 Expected.U##Size[sel] = Value1; \ |
| 1581 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ |
| 1582 reset(); \ |
| 1583 } while (0) |
| 1584 |
| 1585 #define TestPinsrXmmAddrImm(Dst, Value0, Value1, Imm, Size) \ |
| 1586 do { \ |
| 1587 static constexpr char TestString[] = \ |
| 1588 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Size ")"; \ |
| 1589 const uint32_t T0 = allocateDqword(); \ |
| 1590 const Dqword V0 Value0; \ |
| 1591 const uint32_t T1 = allocateDword(); \ |
| 1592 const uint32_t V1 = Value1; \ |
| 1593 \ |
| 1594 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 1595 __ pinsr(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1), \ |
| 1596 Immediate(Imm)); \ |
| 1597 \ |
| 1598 AssembledTest test = assemble(); \ |
| 1599 test.setDqwordTo(T0, V0); \ |
| 1600 test.setDwordTo(T1, V1); \ |
| 1601 test.run(); \ |
| 1602 \ |
| 1603 constexpr uint8_t sel = (Imm)&Mask##Size; \ |
| 1604 Dqword Expected = V0; \ |
| 1605 Expected.U##Size[sel] = Value1; \ |
| 1606 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ |
| 1607 reset(); \ |
| 1608 } while (0) |
| 1609 |
| 1610 #define TestPinsrSize(Dst, GPR, Value1, Imm, Size) \ |
| 1611 do { \ |
| 1612 TestPinsrXmmGPRImm(Dst, (uint64_t(0xAAAAAAAABBBBBBBBull), \ |
| 1613 uint64_t(0xFFFFFFFFDDDDDDDDull)), \ |
| 1614 GPR, Value1, Imm, Size); \ |
| 1615 TestPinsrXmmAddrImm(Dst, (uint64_t(0xAAAAAAAABBBBBBBBull), \ |
| 1616 uint64_t(0xFFFFFFFFDDDDDDDDull)), \ |
| 1617 Value1, Imm, Size); \ |
| 1618 } while (0) |
| 1619 |
| 1620 #define TestPinsr(Src, Dst) \ |
| 1621 do { \ |
| 1622 TestPinsrSize(Src, Dst, 0xEE, 0x03, 8); \ |
| 1623 TestPinsrSize(Src, Dst, 0xFFEE, 0x03, 16); \ |
| 1624 TestPinsrSize(Src, Dst, 0xC0FFEE, 0x03, 32); \ |
| 1625 } while (0) |
| 1626 |
| 1627 TestPinsr(xmm0, r1); |
| 1628 TestPinsr(xmm1, r2); |
| 1629 TestPinsr(xmm2, r3); |
| 1630 TestPinsr(xmm3, r4); |
| 1631 TestPinsr(xmm4, r5); |
| 1632 TestPinsr(xmm5, r6); |
| 1633 TestPinsr(xmm6, r7); |
| 1634 TestPinsr(xmm7, r8); |
| 1635 TestPinsr(xmm8, r10); |
| 1636 TestPinsr(xmm9, r11); |
| 1637 TestPinsr(xmm10, r12); |
| 1638 TestPinsr(xmm11, r13); |
| 1639 TestPinsr(xmm12, r14); |
| 1640 TestPinsr(xmm13, r15); |
| 1641 TestPinsr(xmm14, r1); |
| 1642 TestPinsr(xmm15, r2); |
| 1643 |
| 1644 #undef TestPinsr |
| 1645 #undef TestPinsrSize |
| 1646 #undef TestPinsrXmmAddrImm |
| 1647 #undef TestPinsrXmmGPRImm |
| 1648 } |
| 1649 |
| 1650 TEST_F(AssemblerX8664Test, Pextr) { |
| 1651 static constexpr uint8_t Mask32 = 0x03; |
| 1652 static constexpr uint8_t Mask16 = 0x07; |
| 1653 static constexpr uint8_t Mask8 = 0x0F; |
| 1654 |
| 1655 #define TestPextrGPRXmmImm(GPR, Src, Value1, Imm, Size) \ |
| 1656 do { \ |
| 1657 static constexpr char TestString[] = \ |
| 1658 "(" #GPR ", " #Src ", " #Value1 ", " #Imm ", " #Size ")"; \ |
| 1659 const uint32_t T0 = allocateDqword(); \ |
| 1660 const Dqword V0 Value1; \ |
| 1661 \ |
| 1662 __ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \ |
| 1663 __ pextr(IceType_i##Size, Encoded_GPR_##GPR(), Encoded_Xmm_##Src(), \ |
| 1664 Immediate(Imm)); \ |
| 1665 \ |
| 1666 AssembledTest test = assemble(); \ |
| 1667 test.setDqwordTo(T0, V0); \ |
| 1668 test.run(); \ |
| 1669 \ |
| 1670 constexpr uint8_t sel = (Imm)&Mask##Size; \ |
| 1671 ASSERT_EQ(V0.U##Size[sel], test.GPR()) << TestString; \ |
| 1672 reset(); \ |
| 1673 } while (0) |
| 1674 |
| 1675 #define TestPextrSize(GPR, Src, Value1, Imm, Size) \ |
| 1676 do { \ |
| 1677 TestPextrGPRXmmImm(GPR, Src, (uint64_t(0xAAAAAAAABBBBBBBBull), \ |
| 1678 uint64_t(0xFFFFFFFFDDDDDDDDull)), \ |
| 1679 Imm, Size); \ |
| 1680 } while (0) |
| 1681 |
| 1682 #define TestPextr(Src, Dst) \ |
| 1683 do { \ |
| 1684 TestPextrSize(Src, Dst, 0xEE, 0x03, 8); \ |
| 1685 TestPextrSize(Src, Dst, 0xFFEE, 0x03, 16); \ |
| 1686 TestPextrSize(Src, Dst, 0xC0FFEE, 0x03, 32); \ |
| 1687 } while (0) |
| 1688 |
| 1689 TestPextr(r1, xmm0); |
| 1690 TestPextr(r2, xmm1); |
| 1691 TestPextr(r3, xmm2); |
| 1692 TestPextr(r4, xmm3); |
| 1693 TestPextr(r5, xmm4); |
| 1694 TestPextr(r6, xmm5); |
| 1695 TestPextr(r7, xmm6); |
| 1696 TestPextr(r8, xmm7); |
| 1697 TestPextr(r10, xmm8); |
| 1698 TestPextr(r11, xmm9); |
| 1699 TestPextr(r12, xmm10); |
| 1700 TestPextr(r13, xmm11); |
| 1701 TestPextr(r14, xmm12); |
| 1702 TestPextr(r15, xmm13); |
| 1703 TestPextr(r1, xmm14); |
| 1704 TestPextr(r2, xmm15); |
| 1705 |
| 1706 #undef TestPextr |
| 1707 #undef TestPextrSize |
| 1708 #undef TestPextrXmmGPRImm |
| 1709 } |
| 1710 |
| 1711 TEST_F(AssemblerX8664Test, Pcmpeq_Pcmpgt) { |
| 1712 #define TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, Inst, Op) \ |
| 1713 do { \ |
| 1714 static constexpr char TestString[] = \ |
| 1715 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #Op ")"; \ |
| 1716 const uint32_t T0 = allocateDqword(); \ |
| 1717 const Dqword V0 Value0; \ |
| 1718 const uint32_t T1 = allocateDqword(); \ |
| 1719 const Dqword V1 Value1; \ |
| 1720 \ |
| 1721 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 1722 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ |
| 1723 __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \ |
| 1724 \ |
| 1725 AssembledTest test = assemble(); \ |
| 1726 test.setDqwordTo(T0, V0); \ |
| 1727 test.setDqwordTo(T1, V1); \ |
| 1728 test.run(); \ |
| 1729 \ |
| 1730 Dqword Expected(uint64_t(0), uint64_t(0)); \ |
| 1731 static constexpr uint8_t ArraySize = \ |
| 1732 sizeof(Dqword) / sizeof(uint##Size##_t); \ |
| 1733 for (uint8_t i = 0; i < ArraySize; ++i) { \ |
| 1734 Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0; \ |
| 1735 } \ |
| 1736 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ |
| 1737 reset(); \ |
| 1738 } while (0) |
| 1739 |
| 1740 #define TestPcmpXmmAddr(Dst, Value0, Value1, Size, Inst, Op) \ |
| 1741 do { \ |
| 1742 static constexpr char TestString[] = \ |
| 1743 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #Op ")"; \ |
| 1744 const uint32_t T0 = allocateDqword(); \ |
| 1745 const Dqword V0 Value0; \ |
| 1746 const uint32_t T1 = allocateDqword(); \ |
| 1747 const Dqword V1 Value1; \ |
| 1748 \ |
| 1749 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 1750 __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \ |
| 1751 \ |
| 1752 AssembledTest test = assemble(); \ |
| 1753 test.setDqwordTo(T0, V0); \ |
| 1754 test.setDqwordTo(T1, V1); \ |
| 1755 test.run(); \ |
| 1756 \ |
| 1757 Dqword Expected(uint64_t(0), uint64_t(0)); \ |
| 1758 static constexpr uint8_t ArraySize = \ |
| 1759 sizeof(Dqword) / sizeof(uint##Size##_t); \ |
| 1760 for (uint8_t i = 0; i < ArraySize; ++i) { \ |
| 1761 Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0; \ |
| 1762 } \ |
| 1763 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ |
| 1764 reset(); \ |
| 1765 } while (0) |
| 1766 |
| 1767 #define TestPcmpValues(Dst, Value0, Src, Value1, Size) \ |
| 1768 do { \ |
| 1769 TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpeq, == ); \ |
| 1770 TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpeq, == ); \ |
| 1771 TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpgt, < ); \ |
| 1772 TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpgt, < ); \ |
| 1773 } while (0) |
| 1774 |
| 1775 #define TestPcmpSize(Dst, Src, Size) \ |
| 1776 do { \ |
| 1777 TestPcmpValues(Dst, (uint64_t(0x8888888888888888ull), \ |
| 1778 uint64_t(0x0000000000000000ull)), \ |
| 1779 Src, (uint64_t(0x0000008800008800ull), \ |
| 1780 uint64_t(0xFFFFFFFFFFFFFFFFull)), \ |
| 1781 Size); \ |
| 1782 TestPcmpValues(Dst, (uint64_t(0x123567ABAB55DE01ull), \ |
| 1783 uint64_t(0x12345abcde12345Aull)), \ |
| 1784 Src, (uint64_t(0x0000008800008800ull), \ |
| 1785 uint64_t(0xAABBCCDD1234321Aull)), \ |
| 1786 Size); \ |
| 1787 } while (0) |
| 1788 |
| 1789 #define TestPcmp(Dst, Src) \ |
| 1790 do { \ |
| 1791 TestPcmpSize(xmm0, xmm1, 8); \ |
| 1792 TestPcmpSize(xmm0, xmm1, 16); \ |
| 1793 TestPcmpSize(xmm0, xmm1, 32); \ |
| 1794 } while (0) |
| 1795 |
| 1796 TestPcmp(xmm0, xmm1); |
| 1797 TestPcmp(xmm1, xmm2); |
| 1798 TestPcmp(xmm2, xmm3); |
| 1799 TestPcmp(xmm3, xmm4); |
| 1800 TestPcmp(xmm4, xmm5); |
| 1801 TestPcmp(xmm5, xmm6); |
| 1802 TestPcmp(xmm6, xmm7); |
| 1803 TestPcmp(xmm7, xmm8); |
| 1804 TestPcmp(xmm8, xmm9); |
| 1805 TestPcmp(xmm9, xmm10); |
| 1806 TestPcmp(xmm10, xmm11); |
| 1807 TestPcmp(xmm11, xmm12); |
| 1808 TestPcmp(xmm12, xmm13); |
| 1809 TestPcmp(xmm13, xmm14); |
| 1810 TestPcmp(xmm14, xmm15); |
| 1811 TestPcmp(xmm15, xmm0); |
| 1812 |
| 1813 #undef TestPcmp |
| 1814 #undef TestPcmpSize |
| 1815 #undef TestPcmpValues |
| 1816 #undef TestPcmpXmmAddr |
| 1817 #undef TestPcmpXmmXmm |
| 1818 } |
| 1819 |
| 1820 TEST_F(AssemblerX8664Test, Roundsd) { |
| 1821 #define TestRoundsdXmmXmm(Dst, Src, Mode, Input, RN) \ |
| 1822 do { \ |
| 1823 static constexpr char TestString[] = \ |
| 1824 "(" #Dst ", " #Src ", " #Mode ", " #Input ", " #RN ")"; \ |
| 1825 const uint32_t T0 = allocateDqword(); \ |
| 1826 const Dqword V0(-3.0, -3.0); \ |
| 1827 const uint32_t T1 = allocateDqword(); \ |
| 1828 const Dqword V1(double(Input), -123.4); \ |
| 1829 \ |
| 1830 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ |
| 1831 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ |
| 1832 __ roundsd(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \ |
| 1833 AssemblerX8664::k##Mode); \ |
| 1834 \ |
| 1835 AssembledTest test = assemble(); \ |
| 1836 test.setDqwordTo(T0, V0); \ |
| 1837 test.setDqwordTo(T1, V1); \ |
| 1838 test.run(); \ |
| 1839 \ |
| 1840 const Dqword Expected(double(RN), -3.0); \ |
| 1841 EXPECT_EQ(Expected, test.Dst<Dqword>()) << TestString; \ |
| 1842 reset(); \ |
| 1843 } while (0) |
| 1844 |
| 1845 #define TestRoundsd(Dst, Src) \ |
| 1846 do { \ |
| 1847 TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.51, 6); \ |
| 1848 TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.49, 5); \ |
| 1849 TestRoundsdXmmXmm(Dst, Src, RoundDown, 5.51, 5); \ |
| 1850 TestRoundsdXmmXmm(Dst, Src, RoundUp, 5.49, 6); \ |
| 1851 TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.49, 5); \ |
| 1852 TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.51, 5); \ |
| 1853 } while (0) |
| 1854 |
| 1855 TestRoundsd(xmm0, xmm1); |
| 1856 TestRoundsd(xmm1, xmm2); |
| 1857 TestRoundsd(xmm2, xmm3); |
| 1858 TestRoundsd(xmm3, xmm4); |
| 1859 TestRoundsd(xmm4, xmm5); |
| 1860 TestRoundsd(xmm5, xmm6); |
| 1861 TestRoundsd(xmm6, xmm7); |
| 1862 TestRoundsd(xmm7, xmm8); |
| 1863 TestRoundsd(xmm8, xmm9); |
| 1864 TestRoundsd(xmm9, xmm10); |
| 1865 TestRoundsd(xmm10, xmm11); |
| 1866 TestRoundsd(xmm11, xmm12); |
| 1867 TestRoundsd(xmm12, xmm13); |
| 1868 TestRoundsd(xmm13, xmm14); |
| 1869 TestRoundsd(xmm14, xmm15); |
| 1870 TestRoundsd(xmm15, xmm0); |
| 1871 |
| 1872 #undef TestRoundsd |
| 1873 #undef TestRoundsdXmmXmm |
| 1874 } |
| 1875 |
| 1876 TEST_F(AssemblerX8664Test, Set1ps) { |
| 1877 #define TestImpl(Xmm, Src, Imm) \ |
| 1878 do { \ |
| 1879 __ set1ps(Encoded_Xmm_##Xmm(), Encoded_GPR_##Src(), Immediate(Imm)); \ |
| 1880 \ |
| 1881 AssembledTest test = assemble(); \ |
| 1882 test.run(); \ |
| 1883 \ |
| 1884 const Dqword Expected((uint64_t(Imm) << 32) | uint32_t(Imm), \ |
| 1885 (uint64_t(Imm) << 32) | uint32_t(Imm)); \ |
| 1886 ASSERT_EQ(Expected, test.Xmm<Dqword>()) \ |
| 1887 << "(" #Xmm ", " #Src ", " #Imm ")"; \ |
| 1888 reset(); \ |
| 1889 } while (0) |
| 1890 |
| 1891 TestImpl(xmm0, r1, 1); |
| 1892 TestImpl(xmm1, r2, 12); |
| 1893 TestImpl(xmm2, r3, 22); |
| 1894 TestImpl(xmm3, r4, 54); |
| 1895 TestImpl(xmm4, r5, 80); |
| 1896 TestImpl(xmm5, r6, 32); |
| 1897 TestImpl(xmm6, r7, 55); |
| 1898 TestImpl(xmm7, r8, 44); |
| 1899 TestImpl(xmm8, r10, 10); |
| 1900 TestImpl(xmm9, r11, 155); |
| 1901 TestImpl(xmm10, r12, 165); |
| 1902 TestImpl(xmm11, r13, 170); |
| 1903 TestImpl(xmm12, r14, 200); |
| 1904 TestImpl(xmm13, r15, 124); |
| 1905 TestImpl(xmm14, r1, 101); |
| 1906 TestImpl(xmm15, r2, 166); |
| 1907 |
| 1908 #undef TestImpl |
| 1909 } |
| 1910 |
| 1911 } // end of anonymous namespace |
| 1912 } // end of namespace Test |
| 1913 } // end of namespace X8664 |
| 1914 } // end of namespace Ice |
| OLD | NEW |