OLD | NEW |
(Empty) | |
| 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include <stdio.h> |
| 6 #include <stdlib.h> |
| 7 #include <string.h> |
| 8 #include <cmath> |
| 9 #include <limits> |
| 10 |
| 11 #include "src/arm64/decoder-arm64-inl.h" |
| 12 #include "src/arm64/disasm-arm64.h" |
| 13 #include "src/arm64/simulator-arm64.h" |
| 14 #include "src/arm64/utils-arm64.h" |
| 15 #include "src/base/platform/platform.h" |
| 16 #include "src/base/utils/random-number-generator.h" |
| 17 #include "src/macro-assembler-inl.h" |
| 18 #include "test/cctest/cctest.h" |
| 19 #include "test/cctest/test-simulator-neon-inputs-arm64.h" |
| 20 #include "test/cctest/test-simulator-neon-traces-arm64.h" |
| 21 #include "test/cctest/test-utils-arm64.h" |
| 22 |
| 23 using namespace v8::internal; |
| 24 |
| 25 // Test infrastructure. |
| 26 // |
| 27 // Tests are functions which accept no parameters and have no return values. |
| 28 // The testing code should not perform an explicit return once completed. For |
| 29 // example to test the mov immediate instruction a very simple test would be: |
| 30 // |
| 31 // SIMTEST(mov_x0_one) { |
| 32 // SETUP(); |
| 33 // |
| 34 // START(); |
| 35 // __ mov(x0, Operand(1)); |
| 36 // END(); |
| 37 // |
| 38 // RUN(); |
| 39 // |
| 40 // CHECK_EQUAL_64(1, x0); |
| 41 // |
| 42 // TEARDOWN(); |
| 43 // } |
| 44 // |
| 45 // Within a START ... END block all registers but sp can be modified. sp has to |
| 46 // be explicitly saved/restored. The END() macro replaces the function return |
| 47 // so it may appear multiple times in a test if the test has multiple exit |
| 48 // points. |
| 49 // |
| 50 // Once the test has been run all integer and floating point registers as well |
| 51 // as flags are accessible through a RegisterDump instance, see |
| 52 // test-utils-arm64.h for more info on RegisterDump. |
| 53 // |
| 54 // We provide some helper assert to handle common cases: |
| 55 // |
| 56 // CHECK_EQUAL_32(int32_t, int32_t) |
| 57 // CHECK_EQUAL_FP32(float, float) |
| 58 // CHECK_EQUAL_32(int32_t, W register) |
| 59 // CHECK_EQUAL_FP32(float, S register) |
| 60 // CHECK_EQUAL_64(int64_t, int64_t) |
| 61 // CHECK_EQUAL_FP64(double, double) |
| 62 // CHECK_EQUAL_64(int64_t, X register) |
| 63 // CHECK_EQUAL_64(X register, X register) |
| 64 // CHECK_EQUAL_FP64(double, D register) |
| 65 // |
| 66 // e.g. CHECK_EQUAL_64(0.5, d30); |
| 67 // |
| 68 // If more advance computation is required before the assert then access the |
| 69 // RegisterDump named core directly: |
| 70 // |
| 71 // CHECK_EQUAL_64(0x1234, core.xreg(0) & 0xffff); |
| 72 |
| 73 #if 0 // TODO(all): enable. |
| 74 static v8::Persistent<v8::Context> env; |
| 75 |
| 76 static void InitializeVM() { |
| 77 if (env.IsEmpty()) { |
| 78 env = v8::Context::New(); |
| 79 } |
| 80 } |
| 81 #endif |
| 82 |
| 83 #define __ masm. |
| 84 #define SIMTEST(name) TEST(SIM_##name) |
| 85 |
| 86 #define BUF_SIZE 8192 |
| 87 #define SETUP() SETUP_SIZE(BUF_SIZE) |
| 88 |
| 89 #define INIT_V8() CcTest::InitializeVM(); |
| 90 |
| 91 #ifdef USE_SIMULATOR |
| 92 |
| 93 // Run tests with the simulator. |
| 94 #define SETUP_SIZE(buf_size) \ |
| 95 Isolate* isolate = CcTest::i_isolate(); \ |
| 96 HandleScope scope(isolate); \ |
| 97 CHECK(isolate != NULL); \ |
| 98 byte* buf = new byte[buf_size]; \ |
| 99 MacroAssembler masm(isolate, buf, buf_size, \ |
| 100 v8::internal::CodeObjectRequired::kYes); \ |
| 101 Decoder<DispatchingDecoderVisitor>* decoder = \ |
| 102 new Decoder<DispatchingDecoderVisitor>(); \ |
| 103 Simulator simulator(decoder); \ |
| 104 RegisterDump core; |
| 105 |
| 106 // Reset the assembler and simulator, so that instructions can be generated, |
| 107 // but don't actually emit any code. This can be used by tests that need to |
| 108 // emit instructions at the start of the buffer. Note that START_AFTER_RESET |
| 109 // must be called before any callee-saved register is modified, and before an |
| 110 // END is encountered. |
| 111 // |
| 112 // Most tests should call START, rather than call RESET directly. |
| 113 #define RESET() \ |
| 114 __ Reset(); \ |
| 115 simulator.ResetState(); |
| 116 |
| 117 #define START_AFTER_RESET() \ |
| 118 __ SetStackPointer(csp); \ |
| 119 __ PushCalleeSavedRegisters(); \ |
| 120 __ Debug("Start test.", __LINE__, TRACE_ENABLE | LOG_ALL); |
| 121 |
| 122 #define START() \ |
| 123 RESET(); \ |
| 124 START_AFTER_RESET(); |
| 125 |
| 126 #define RUN() simulator.RunFrom(reinterpret_cast<Instruction*>(buf)) |
| 127 |
| 128 #define END() \ |
| 129 __ Debug("End test.", __LINE__, TRACE_DISABLE | LOG_ALL); \ |
| 130 core.Dump(&masm); \ |
| 131 __ PopCalleeSavedRegisters(); \ |
| 132 __ Ret(); \ |
| 133 __ GetCode(NULL); |
| 134 |
| 135 #define TEARDOWN() delete[] buf; |
| 136 |
| 137 #else // ifdef USE_SIMULATOR. |
| 138 // Run the test on real hardware or models. |
| 139 #define SETUP_SIZE(buf_size) \ |
| 140 Isolate* isolate = CcTest::i_isolate(); \ |
| 141 HandleScope scope(isolate); \ |
| 142 CHECK(isolate != NULL); \ |
| 143 size_t actual_size; \ |
| 144 byte* buf = static_cast<byte*>( \ |
| 145 v8::base::OS::Allocate(buf_size, &actual_size, true)); \ |
| 146 MacroAssembler masm(isolate, buf, actual_size, \ |
| 147 v8::internal::CodeObjectRequired::kYes); \ |
| 148 RegisterDump core; |
| 149 |
| 150 #define RESET() \ |
| 151 __ Reset(); \ |
| 152 /* Reset the machine state (like simulator.ResetState()). */ \ |
| 153 __ Msr(NZCV, xzr); \ |
| 154 __ Msr(FPCR, xzr); |
| 155 |
| 156 #define START_AFTER_RESET() \ |
| 157 __ SetStackPointer(csp); \ |
| 158 __ PushCalleeSavedRegisters(); |
| 159 |
| 160 #define START() \ |
| 161 RESET(); \ |
| 162 START_AFTER_RESET(); |
| 163 |
| 164 #define RUN() \ |
| 165 Assembler::FlushICache(isolate, buf, masm.SizeOfGeneratedCode()); \ |
| 166 { \ |
| 167 void (*test_function)(void); \ |
| 168 memcpy(&test_function, &buf, sizeof(buf)); \ |
| 169 test_function(); \ |
| 170 } |
| 171 |
| 172 #define END() \ |
| 173 core.Dump(&masm); \ |
| 174 __ PopCalleeSavedRegisters(); \ |
| 175 __ Ret(); \ |
| 176 __ GetCode(NULL); |
| 177 |
| 178 #define TEARDOWN() v8::base::OS::Free(buf, actual_size); |
| 179 |
| 180 #endif // ifdef USE_SIMULATOR. |
| 181 |
| 182 #define CHECK_EQUAL_NZCV(expected) CHECK(EqualNzcv(expected, core.flags_nzcv())) |
| 183 |
| 184 #define CHECK_EQUAL_REGISTERS(expected) CHECK(EqualRegisters(&expected, &core)) |
| 185 |
| 186 #define CHECK_EQUAL_32(expected, result) \ |
| 187 CHECK(Equal32(static_cast<uint32_t>(expected), &core, result)) |
| 188 |
| 189 #define CHECK_EQUAL_FP32(expected, result) \ |
| 190 CHECK(EqualFP32(expected, &core, result)) |
| 191 |
| 192 #define CHECK_EQUAL_64(expected, result) CHECK(Equal64(expected, &core, result)) |
| 193 |
| 194 #define CHECK_EQUAL_FP64(expected, result) \ |
| 195 CHECK(EqualFP64(expected, &core, result)) |
| 196 |
| 197 #ifdef DEBUG |
| 198 #define CHECK_LITERAL_POOL_SIZE(expected) \ |
| 199 CHECK((expected) == (__ LiteralPoolSize())) |
| 200 #else |
| 201 #define CHECK_LITERAL_POOL_SIZE(expected) ((void)0) |
| 202 #endif |
| 203 |
| 204 // The maximum number of errors to report in detail for each test. |
| 205 static const unsigned kErrorReportLimit = 8; |
| 206 |
| 207 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd, |
| 208 const VRegister& vn); |
| 209 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd, |
| 210 const VRegister& vn, |
| 211 const VRegister& vm); |
| 212 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd, |
| 213 const VRegister& vn, |
| 214 const VRegister& vm, |
| 215 int vm_index); |
| 216 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)( |
| 217 const VRegister& vd, int imm1, const VRegister& vn, int imm2); |
| 218 |
| 219 // This helps using the same typename for both the function pointer |
| 220 // and the array of immediates passed to helper routines. |
| 221 template <typename T> |
| 222 class Test2OpImmediateNEONHelper_t { |
| 223 public: |
| 224 typedef void (MacroAssembler::*mnemonic)(const VRegister& vd, |
| 225 const VRegister& vn, T imm); |
| 226 }; |
| 227 |
| 228 namespace { |
| 229 |
| 230 // Maximum number of hex characters required to represent values of either |
| 231 // templated type. |
| 232 template <typename Ta, typename Tb> |
| 233 unsigned MaxHexCharCount() { |
| 234 unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb))); |
| 235 return (count * 8) / 4; |
| 236 } |
| 237 |
| 238 // ==== Tests for instructions of the form <INST> VReg, VReg. ==== |
| 239 |
| 240 void Test1OpNEON_Helper(Test1OpNEONHelper_t helper, uintptr_t inputs_n, |
| 241 unsigned inputs_n_length, uintptr_t results, |
| 242 VectorFormat vd_form, VectorFormat vn_form) { |
| 243 DCHECK_NE(vd_form, kFormatUndefined); |
| 244 DCHECK_NE(vn_form, kFormatUndefined); |
| 245 |
| 246 SETUP(); |
| 247 START(); |
| 248 |
| 249 // Roll up the loop to keep the code size down. |
| 250 Label loop_n; |
| 251 |
| 252 Register out = x0; |
| 253 Register inputs_n_base = x1; |
| 254 Register inputs_n_last_16bytes = x3; |
| 255 Register index_n = x5; |
| 256 |
| 257 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); |
| 258 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); |
| 259 |
| 260 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); |
| 261 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); |
| 262 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); |
| 263 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); |
| 264 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); |
| 265 |
| 266 // These will be either a D- or a Q-register form, with a single lane |
| 267 // (for use in scalar load and store operations). |
| 268 VRegister vd = VRegister::Create(0, vd_bits); |
| 269 VRegister vn = v1.V16B(); |
| 270 VRegister vntmp = v3.V16B(); |
| 271 |
| 272 // These will have the correct format for use when calling 'helper'. |
| 273 VRegister vd_helper = VRegister::Create(0, vd_bits, vd_lane_count); |
| 274 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); |
| 275 |
| 276 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. |
| 277 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); |
| 278 |
| 279 __ Mov(out, results); |
| 280 |
| 281 __ Mov(inputs_n_base, inputs_n); |
| 282 __ Mov(inputs_n_last_16bytes, |
| 283 inputs_n + (vn_lane_bytes * inputs_n_length) - 16); |
| 284 |
| 285 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); |
| 286 |
| 287 __ Mov(index_n, 0); |
| 288 __ Bind(&loop_n); |
| 289 |
| 290 __ Ldr(vntmp_single, |
| 291 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); |
| 292 __ Ext(vn, vn, vntmp, vn_lane_bytes); |
| 293 |
| 294 // Set the destination to zero. |
| 295 |
| 296 // TODO(all): Setting the destination to values other than zero might be a |
| 297 // better test for instructions such as sqxtn2 which may leave parts of V |
| 298 // registers unchanged. |
| 299 __ Movi(vd.V16B(), 0); |
| 300 |
| 301 (masm.*helper)(vd_helper, vn_helper); |
| 302 |
| 303 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex)); |
| 304 |
| 305 __ Add(index_n, index_n, 1); |
| 306 __ Cmp(index_n, inputs_n_length); |
| 307 __ B(lo, &loop_n); |
| 308 |
| 309 END(); |
| 310 RUN(); |
| 311 TEARDOWN(); |
| 312 } |
| 313 |
| 314 // Test NEON instructions. The inputs_*[] and expected[] arrays should be |
| 315 // arrays of rawbit representation of input values. This ensures that |
| 316 // exact bit comparisons can be performed. |
| 317 template <typename Td, typename Tn> |
| 318 void Test1OpNEON(const char* name, Test1OpNEONHelper_t helper, |
| 319 const Tn inputs_n[], unsigned inputs_n_length, |
| 320 const Td expected[], unsigned expected_length, |
| 321 VectorFormat vd_form, VectorFormat vn_form) { |
| 322 DCHECK_GT(inputs_n_length, 0U); |
| 323 |
| 324 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); |
| 325 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); |
| 326 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); |
| 327 |
| 328 const unsigned results_length = inputs_n_length; |
| 329 std::vector<Td> results(results_length * vd_lane_count, 0); |
| 330 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); |
| 331 |
| 332 Test1OpNEON_Helper( |
| 333 helper, reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, |
| 334 reinterpret_cast<uintptr_t>(results.data()), vd_form, vn_form); |
| 335 |
| 336 // Check the results. |
| 337 CHECK(expected_length == results_length); |
| 338 unsigned error_count = 0; |
| 339 unsigned d = 0; |
| 340 const char* padding = " "; |
| 341 DCHECK_GE(strlen(padding), (lane_len_in_hex + 1)); |
| 342 for (unsigned n = 0; n < inputs_n_length; n++, d++) { |
| 343 bool error_in_vector = false; |
| 344 |
| 345 for (unsigned lane = 0; lane < vd_lane_count; lane++) { |
| 346 unsigned output_index = (n * vd_lane_count) + lane; |
| 347 |
| 348 if (results[output_index] != expected[output_index]) { |
| 349 error_in_vector = true; |
| 350 break; |
| 351 } |
| 352 } |
| 353 |
| 354 if (error_in_vector && (++error_count <= kErrorReportLimit)) { |
| 355 printf("%s\n", name); |
| 356 printf(" Vn%.*s| Vd%.*s| Expected\n", lane_len_in_hex + 1, padding, |
| 357 lane_len_in_hex + 1, padding); |
| 358 |
| 359 const unsigned first_index_n = |
| 360 inputs_n_length - (16 / vn_lane_bytes) + n + 1; |
| 361 |
| 362 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count); |
| 363 lane++) { |
| 364 unsigned output_index = (n * vd_lane_count) + lane; |
| 365 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; |
| 366 |
| 367 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 |
| 368 " " |
| 369 "| 0x%0*" PRIx64 "\n", |
| 370 results[output_index] != expected[output_index] ? '*' : ' ', |
| 371 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]), |
| 372 lane_len_in_hex, static_cast<uint64_t>(results[output_index]), |
| 373 lane_len_in_hex, static_cast<uint64_t>(expected[output_index])); |
| 374 } |
| 375 } |
| 376 } |
| 377 DCHECK_EQ(d, expected_length); |
| 378 if (error_count > kErrorReportLimit) { |
| 379 printf("%u other errors follow.\n", error_count - kErrorReportLimit); |
| 380 } |
| 381 DCHECK_EQ(error_count, 0U); |
| 382 } |
| 383 |
| 384 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ==== |
| 385 // where <V> is one of B, H, S or D registers. |
| 386 // e.g. saddlv H1, v0.8B |
| 387 |
| 388 // TODO(all): Change tests to store all lanes of the resulting V register. |
| 389 // Some tests store all 128 bits of the resulting V register to |
| 390 // check the simulator's behaviour on the rest of the register. |
| 391 // This is better than storing the affected lanes only. |
| 392 // Change any tests such as the 'Across' template to do the same. |
| 393 |
| 394 void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper, uintptr_t inputs_n, |
| 395 unsigned inputs_n_length, uintptr_t results, |
| 396 VectorFormat vd_form, VectorFormat vn_form) { |
| 397 DCHECK_NE(vd_form, kFormatUndefined); |
| 398 DCHECK_NE(vn_form, kFormatUndefined); |
| 399 |
| 400 SETUP(); |
| 401 START(); |
| 402 |
| 403 // Roll up the loop to keep the code size down. |
| 404 Label loop_n; |
| 405 |
| 406 Register out = x0; |
| 407 Register inputs_n_base = x1; |
| 408 Register inputs_n_last_vector = x3; |
| 409 Register index_n = x5; |
| 410 |
| 411 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); |
| 412 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); |
| 413 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); |
| 414 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); |
| 415 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); |
| 416 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); |
| 417 |
| 418 // Test destructive operations by (arbitrarily) using the same register for |
| 419 // B and S lane sizes. |
| 420 bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize); |
| 421 |
| 422 // These will be either a D- or a Q-register form, with a single lane |
| 423 // (for use in scalar load and store operations). |
| 424 // Create two aliases for v8; the first is the destination for the tested |
| 425 // instruction, the second, the whole Q register to check the results. |
| 426 VRegister vd = VRegister::Create(0, vd_bits); |
| 427 VRegister vdstr = VRegister::Create(0, kQRegSizeInBits); |
| 428 |
| 429 VRegister vn = VRegister::Create(1, vn_bits); |
| 430 VRegister vntmp = VRegister::Create(3, vn_bits); |
| 431 |
| 432 // These will have the correct format for use when calling 'helper'. |
| 433 VRegister vd_helper = VRegister::Create(0, vn_bits, vn_lane_count); |
| 434 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); |
| 435 |
| 436 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. |
| 437 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); |
| 438 |
| 439 // Same registers for use in the 'ext' instructions. |
| 440 VRegister vn_ext = (kDRegSizeInBits == vn_bits) ? vn.V8B() : vn.V16B(); |
| 441 VRegister vntmp_ext = |
| 442 (kDRegSizeInBits == vn_bits) ? vntmp.V8B() : vntmp.V16B(); |
| 443 |
| 444 __ Mov(out, results); |
| 445 |
| 446 __ Mov(inputs_n_base, inputs_n); |
| 447 __ Mov(inputs_n_last_vector, |
| 448 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count)); |
| 449 |
| 450 __ Ldr(vn, MemOperand(inputs_n_last_vector)); |
| 451 |
| 452 __ Mov(index_n, 0); |
| 453 __ Bind(&loop_n); |
| 454 |
| 455 __ Ldr(vntmp_single, |
| 456 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); |
| 457 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes); |
| 458 |
| 459 if (destructive) { |
| 460 __ Mov(vd_helper, vn_helper); |
| 461 (masm.*helper)(vd, vd_helper); |
| 462 } else { |
| 463 (masm.*helper)(vd, vn_helper); |
| 464 } |
| 465 |
| 466 __ Str(vdstr, MemOperand(out, kQRegSize, PostIndex)); |
| 467 |
| 468 __ Add(index_n, index_n, 1); |
| 469 __ Cmp(index_n, inputs_n_length); |
| 470 __ B(lo, &loop_n); |
| 471 |
| 472 END(); |
| 473 RUN(); |
| 474 TEARDOWN(); |
| 475 } |
| 476 |
| 477 // Test NEON instructions. The inputs_*[] and expected[] arrays should be |
| 478 // arrays of rawbit representation of input values. This ensures that |
| 479 // exact bit comparisons can be performed. |
| 480 template <typename Td, typename Tn> |
| 481 void Test1OpAcrossNEON(const char* name, Test1OpNEONHelper_t helper, |
| 482 const Tn inputs_n[], unsigned inputs_n_length, |
| 483 const Td expected[], unsigned expected_length, |
| 484 VectorFormat vd_form, VectorFormat vn_form) { |
| 485 DCHECK_GT(inputs_n_length, 0U); |
| 486 |
| 487 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); |
| 488 const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form); |
| 489 |
| 490 const unsigned results_length = inputs_n_length; |
| 491 std::vector<Td> results(results_length * vd_lanes_per_q, 0); |
| 492 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); |
| 493 |
| 494 Test1OpAcrossNEON_Helper( |
| 495 helper, reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, |
| 496 reinterpret_cast<uintptr_t>(results.data()), vd_form, vn_form); |
| 497 |
| 498 // Check the results. |
| 499 DCHECK_EQ(expected_length, results_length); |
| 500 unsigned error_count = 0; |
| 501 unsigned d = 0; |
| 502 const char* padding = " "; |
| 503 DCHECK_GE(strlen(padding), (lane_len_in_hex + 1)); |
| 504 for (unsigned n = 0; n < inputs_n_length; n++, d++) { |
| 505 bool error_in_vector = false; |
| 506 |
| 507 for (unsigned lane = 0; lane < vd_lane_count; lane++) { |
| 508 unsigned expected_index = (n * vd_lane_count) + lane; |
| 509 unsigned results_index = (n * vd_lanes_per_q) + lane; |
| 510 |
| 511 if (results[results_index] != expected[expected_index]) { |
| 512 error_in_vector = true; |
| 513 break; |
| 514 } |
| 515 |
| 516 // For across operations, the remaining lanes should be zero. |
| 517 for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) { |
| 518 unsigned results_index = (n * vd_lanes_per_q) + lane; |
| 519 if (results[results_index] != 0) { |
| 520 error_in_vector = true; |
| 521 break; |
| 522 } |
| 523 } |
| 524 } |
| 525 |
| 526 if (error_in_vector && (++error_count <= kErrorReportLimit)) { |
| 527 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); |
| 528 |
| 529 printf("%s\n", name); |
| 530 printf(" Vn%.*s| Vd%.*s| Expected\n", lane_len_in_hex + 1, padding, |
| 531 lane_len_in_hex + 1, padding); |
| 532 |
| 533 for (unsigned lane = 0; lane < vn_lane_count; lane++) { |
| 534 unsigned results_index = |
| 535 (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane); |
| 536 unsigned input_index_n = |
| 537 (inputs_n_length - vn_lane_count + n + 1 + lane) % inputs_n_length; |
| 538 |
| 539 Td expect = 0; |
| 540 if ((vn_lane_count - 1) == lane) { |
| 541 // This is the last lane to be printed, ie. the least-significant |
| 542 // lane, so use the expected value; any other lane should be zero. |
| 543 unsigned expected_index = n * vd_lane_count; |
| 544 expect = expected[expected_index]; |
| 545 } |
| 546 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", |
| 547 results[results_index] != expect ? '*' : ' ', lane_len_in_hex, |
| 548 static_cast<uint64_t>(inputs_n[input_index_n]), lane_len_in_hex, |
| 549 static_cast<uint64_t>(results[results_index]), lane_len_in_hex, |
| 550 static_cast<uint64_t>(expect)); |
| 551 } |
| 552 } |
| 553 } |
| 554 DCHECK_EQ(d, expected_length); |
| 555 if (error_count > kErrorReportLimit) { |
| 556 printf("%u other errors follow.\n", error_count - kErrorReportLimit); |
| 557 } |
| 558 DCHECK_EQ(error_count, 0U); |
| 559 } |
| 560 |
| 561 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ==== |
| 562 |
| 563 void Test2OpNEON_Helper(Test2OpNEONHelper_t helper, uintptr_t inputs_d, |
| 564 uintptr_t inputs_n, unsigned inputs_n_length, |
| 565 uintptr_t inputs_m, unsigned inputs_m_length, |
| 566 uintptr_t results, VectorFormat vd_form, |
| 567 VectorFormat vn_form, VectorFormat vm_form) { |
| 568 DCHECK_NE(vd_form, kFormatUndefined); |
| 569 DCHECK_NE(vn_form, kFormatUndefined); |
| 570 DCHECK_NE(vm_form, kFormatUndefined); |
| 571 |
| 572 SETUP(); |
| 573 START(); |
| 574 |
| 575 // Roll up the loop to keep the code size down. |
| 576 Label loop_n, loop_m; |
| 577 |
| 578 Register out = x0; |
| 579 Register inputs_n_base = x1; |
| 580 Register inputs_m_base = x2; |
| 581 Register inputs_d_base = x3; |
| 582 Register inputs_n_last_16bytes = x4; |
| 583 Register inputs_m_last_16bytes = x5; |
| 584 Register index_n = x6; |
| 585 Register index_m = x7; |
| 586 |
| 587 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); |
| 588 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); |
| 589 |
| 590 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); |
| 591 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); |
| 592 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); |
| 593 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); |
| 594 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); |
| 595 |
| 596 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form); |
| 597 const unsigned vm_lane_count = LaneCountFromFormat(vm_form); |
| 598 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form); |
| 599 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form); |
| 600 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form); |
| 601 |
| 602 // Always load and store 128 bits regardless of the format. |
| 603 VRegister vd = v0.V16B(); |
| 604 VRegister vn = v1.V16B(); |
| 605 VRegister vm = v2.V16B(); |
| 606 VRegister vntmp = v3.V16B(); |
| 607 VRegister vmtmp = v4.V16B(); |
| 608 VRegister vres = v5.V16B(); |
| 609 |
| 610 // These will have the correct format for calling the 'helper'. |
| 611 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); |
| 612 VRegister vm_helper = VRegister::Create(2, vm_bits, vm_lane_count); |
| 613 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count); |
| 614 |
| 615 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. |
| 616 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); |
| 617 VRegister vmtmp_single = VRegister::Create(4, vm_lane_bits); |
| 618 |
| 619 __ Mov(out, results); |
| 620 |
| 621 __ Mov(inputs_d_base, inputs_d); |
| 622 |
| 623 __ Mov(inputs_n_base, inputs_n); |
| 624 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16)); |
| 625 __ Mov(inputs_m_base, inputs_m); |
| 626 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16)); |
| 627 |
| 628 __ Ldr(vd, MemOperand(inputs_d_base)); |
| 629 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); |
| 630 __ Ldr(vm, MemOperand(inputs_m_last_16bytes)); |
| 631 |
| 632 __ Mov(index_n, 0); |
| 633 __ Bind(&loop_n); |
| 634 |
| 635 __ Ldr(vntmp_single, |
| 636 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); |
| 637 __ Ext(vn, vn, vntmp, vn_lane_bytes); |
| 638 |
| 639 __ Mov(index_m, 0); |
| 640 __ Bind(&loop_m); |
| 641 |
| 642 __ Ldr(vmtmp_single, |
| 643 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2)); |
| 644 __ Ext(vm, vm, vmtmp, vm_lane_bytes); |
| 645 |
| 646 __ Mov(vres, vd); |
| 647 |
| 648 (masm.*helper)(vres_helper, vn_helper, vm_helper); |
| 649 |
| 650 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex)); |
| 651 |
| 652 __ Add(index_m, index_m, 1); |
| 653 __ Cmp(index_m, inputs_m_length); |
| 654 __ B(lo, &loop_m); |
| 655 |
| 656 __ Add(index_n, index_n, 1); |
| 657 __ Cmp(index_n, inputs_n_length); |
| 658 __ B(lo, &loop_n); |
| 659 |
| 660 END(); |
| 661 RUN(); |
| 662 TEARDOWN(); |
| 663 } |
| 664 |
| 665 // Test NEON instructions. The inputs_*[] and expected[] arrays should be |
| 666 // arrays of rawbit representation of input values. This ensures that |
| 667 // exact bit comparisons can be performed. |
| 668 template <typename Td, typename Tn, typename Tm> |
| 669 void Test2OpNEON(const char* name, Test2OpNEONHelper_t helper, |
| 670 const Td inputs_d[], const Tn inputs_n[], |
| 671 unsigned inputs_n_length, const Tm inputs_m[], |
| 672 unsigned inputs_m_length, const Td expected[], |
| 673 unsigned expected_length, VectorFormat vd_form, |
| 674 VectorFormat vn_form, VectorFormat vm_form) { |
| 675 DCHECK(inputs_n_length > 0 && inputs_m_length > 0); |
| 676 |
| 677 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form); |
| 678 |
| 679 const unsigned results_length = inputs_n_length * inputs_m_length; |
| 680 std::vector<Td> results(results_length * vd_lane_count); |
| 681 const unsigned lane_len_in_hex = |
| 682 static_cast<unsigned>(std::max(sizeof(Td), sizeof(Tm)) * 8) / 4; |
| 683 |
| 684 Test2OpNEON_Helper(helper, reinterpret_cast<uintptr_t>(inputs_d), |
| 685 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, |
| 686 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length, |
| 687 reinterpret_cast<uintptr_t>(results.data()), vd_form, |
| 688 vn_form, vm_form); |
| 689 |
| 690 // Check the results. |
| 691 CHECK(expected_length == results_length); |
| 692 unsigned error_count = 0; |
| 693 unsigned d = 0; |
| 694 const char* padding = " "; |
| 695 DCHECK_GE(strlen(padding), lane_len_in_hex + 1); |
| 696 for (unsigned n = 0; n < inputs_n_length; n++) { |
| 697 for (unsigned m = 0; m < inputs_m_length; m++, d++) { |
| 698 bool error_in_vector = false; |
| 699 |
| 700 for (unsigned lane = 0; lane < vd_lane_count; lane++) { |
| 701 unsigned output_index = |
| 702 (n * inputs_m_length * vd_lane_count) + (m * vd_lane_count) + lane; |
| 703 |
| 704 if (results[output_index] != expected[output_index]) { |
| 705 error_in_vector = true; |
| 706 break; |
| 707 } |
| 708 } |
| 709 |
| 710 if (error_in_vector && (++error_count <= kErrorReportLimit)) { |
| 711 printf("%s\n", name); |
| 712 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n", |
| 713 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding, |
| 714 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding); |
| 715 |
| 716 for (unsigned lane = 0; lane < vd_lane_count; lane++) { |
| 717 unsigned output_index = (n * inputs_m_length * vd_lane_count) + |
| 718 (m * vd_lane_count) + lane; |
| 719 unsigned input_index_n = |
| 720 (inputs_n_length - vd_lane_count + n + 1 + lane) % |
| 721 inputs_n_length; |
| 722 unsigned input_index_m = |
| 723 (inputs_m_length - vd_lane_count + m + 1 + lane) % |
| 724 inputs_m_length; |
| 725 |
| 726 printf( |
| 727 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 |
| 728 " " |
| 729 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", |
| 730 results[output_index] != expected[output_index] ? '*' : ' ', |
| 731 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]), |
| 732 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]), |
| 733 lane_len_in_hex, static_cast<uint64_t>(inputs_m[input_index_m]), |
| 734 lane_len_in_hex, static_cast<uint64_t>(results[output_index]), |
| 735 lane_len_in_hex, static_cast<uint64_t>(expected[output_index])); |
| 736 } |
| 737 } |
| 738 } |
| 739 } |
| 740 DCHECK_EQ(d, expected_length); |
| 741 if (error_count > kErrorReportLimit) { |
| 742 printf("%u other errors follow.\n", error_count - kErrorReportLimit); |
| 743 } |
| 744 DCHECK_EQ(error_count, 0U); |
| 745 } |
| 746 |
| 747 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ==== |
| 748 |
| 749 void TestByElementNEON_Helper(TestByElementNEONHelper_t helper, |
| 750 uintptr_t inputs_d, uintptr_t inputs_n, |
| 751 unsigned inputs_n_length, uintptr_t inputs_m, |
| 752 unsigned inputs_m_length, const int indices[], |
| 753 unsigned indices_length, uintptr_t results, |
| 754 VectorFormat vd_form, VectorFormat vn_form, |
| 755 VectorFormat vm_form) { |
| 756 DCHECK_NE(vd_form, kFormatUndefined); |
| 757 DCHECK_NE(vn_form, kFormatUndefined); |
| 758 DCHECK_NE(vm_form, kFormatUndefined); |
| 759 |
| 760 SETUP(); |
| 761 START(); |
| 762 |
| 763 // Roll up the loop to keep the code size down. |
| 764 Label loop_n, loop_m; |
| 765 |
| 766 Register out = x0; |
| 767 Register inputs_n_base = x1; |
| 768 Register inputs_m_base = x2; |
| 769 Register inputs_d_base = x3; |
| 770 Register inputs_n_last_16bytes = x4; |
| 771 Register inputs_m_last_16bytes = x5; |
| 772 Register index_n = x6; |
| 773 Register index_m = x7; |
| 774 |
| 775 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); |
| 776 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); |
| 777 |
| 778 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); |
| 779 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); |
| 780 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); |
| 781 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); |
| 782 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); |
| 783 |
| 784 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form); |
| 785 const unsigned vm_lane_count = LaneCountFromFormat(vm_form); |
| 786 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form); |
| 787 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form); |
| 788 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form); |
| 789 |
| 790 // Always load and store 128 bits regardless of the format. |
| 791 VRegister vd = v0.V16B(); |
| 792 VRegister vn = v1.V16B(); |
| 793 VRegister vm = v2.V16B(); |
| 794 VRegister vntmp = v3.V16B(); |
| 795 VRegister vmtmp = v4.V16B(); |
| 796 VRegister vres = v5.V16B(); |
| 797 |
| 798 // These will have the correct format for calling the 'helper'. |
| 799 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); |
| 800 VRegister vm_helper = VRegister::Create(2, vm_bits, vm_lane_count); |
| 801 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count); |
| 802 |
| 803 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. |
| 804 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); |
| 805 VRegister vmtmp_single = VRegister::Create(4, vm_lane_bits); |
| 806 |
| 807 __ Mov(out, results); |
| 808 |
| 809 __ Mov(inputs_d_base, inputs_d); |
| 810 |
| 811 __ Mov(inputs_n_base, inputs_n); |
| 812 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16)); |
| 813 __ Mov(inputs_m_base, inputs_m); |
| 814 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16)); |
| 815 |
| 816 __ Ldr(vd, MemOperand(inputs_d_base)); |
| 817 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); |
| 818 __ Ldr(vm, MemOperand(inputs_m_last_16bytes)); |
| 819 |
| 820 __ Mov(index_n, 0); |
| 821 __ Bind(&loop_n); |
| 822 |
| 823 __ Ldr(vntmp_single, |
| 824 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); |
| 825 __ Ext(vn, vn, vntmp, vn_lane_bytes); |
| 826 |
| 827 __ Mov(index_m, 0); |
| 828 __ Bind(&loop_m); |
| 829 |
| 830 __ Ldr(vmtmp_single, |
| 831 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2)); |
| 832 __ Ext(vm, vm, vmtmp, vm_lane_bytes); |
| 833 |
| 834 __ Mov(vres, vd); |
| 835 { |
| 836 for (unsigned i = 0; i < indices_length; i++) { |
| 837 (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]); |
| 838 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex)); |
| 839 } |
| 840 } |
| 841 |
| 842 __ Add(index_m, index_m, 1); |
| 843 __ Cmp(index_m, inputs_m_length); |
| 844 __ B(lo, &loop_m); |
| 845 |
| 846 __ Add(index_n, index_n, 1); |
| 847 __ Cmp(index_n, inputs_n_length); |
| 848 __ B(lo, &loop_n); |
| 849 |
| 850 END(); |
| 851 RUN(); |
| 852 TEARDOWN(); |
| 853 } |
| 854 |
| 855 // Test NEON instructions. The inputs_*[] and expected[] arrays should be |
| 856 // arrays of rawbit representation of input values. This ensures that |
| 857 // exact bit comparisons can be performed. |
| 858 template <typename Td, typename Tn, typename Tm> |
| 859 void TestByElementNEON(const char* name, TestByElementNEONHelper_t helper, |
| 860 const Td inputs_d[], const Tn inputs_n[], |
| 861 unsigned inputs_n_length, const Tm inputs_m[], |
| 862 unsigned inputs_m_length, const int indices[], |
| 863 unsigned indices_length, const Td expected[], |
| 864 unsigned expected_length, VectorFormat vd_form, |
| 865 VectorFormat vn_form, VectorFormat vm_form) { |
| 866 DCHECK_GT(inputs_n_length, 0U); |
| 867 DCHECK_GT(inputs_m_length, 0U); |
| 868 DCHECK_GT(indices_length, 0U); |
| 869 |
| 870 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form); |
| 871 |
| 872 const unsigned results_length = |
| 873 inputs_n_length * inputs_m_length * indices_length; |
| 874 std::vector<Td> results(results_length * vd_lane_count, 0); |
| 875 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>(); |
| 876 |
| 877 TestByElementNEON_Helper( |
| 878 helper, reinterpret_cast<uintptr_t>(inputs_d), |
| 879 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, |
| 880 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length, indices, |
| 881 indices_length, reinterpret_cast<uintptr_t>(results.data()), vd_form, |
| 882 vn_form, vm_form); |
| 883 |
| 884 // Check the results. |
| 885 CHECK(expected_length == results_length); |
| 886 unsigned error_count = 0; |
| 887 unsigned d = 0; |
| 888 const char* padding = " "; |
| 889 DCHECK_GE(strlen(padding), lane_len_in_hex + 1); |
| 890 for (unsigned n = 0; n < inputs_n_length; n++) { |
| 891 for (unsigned m = 0; m < inputs_m_length; m++) { |
| 892 for (unsigned index = 0; index < indices_length; index++, d++) { |
| 893 bool error_in_vector = false; |
| 894 |
| 895 for (unsigned lane = 0; lane < vd_lane_count; lane++) { |
| 896 unsigned output_index = |
| 897 (n * inputs_m_length * indices_length * vd_lane_count) + |
| 898 (m * indices_length * vd_lane_count) + (index * vd_lane_count) + |
| 899 lane; |
| 900 |
| 901 if (results[output_index] != expected[output_index]) { |
| 902 error_in_vector = true; |
| 903 break; |
| 904 } |
| 905 } |
| 906 |
| 907 if (error_in_vector && (++error_count <= kErrorReportLimit)) { |
| 908 printf("%s\n", name); |
| 909 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n", |
| 910 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding, |
| 911 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding); |
| 912 |
| 913 for (unsigned lane = 0; lane < vd_lane_count; lane++) { |
| 914 unsigned output_index = |
| 915 (n * inputs_m_length * indices_length * vd_lane_count) + |
| 916 (m * indices_length * vd_lane_count) + (index * vd_lane_count) + |
| 917 lane; |
| 918 unsigned input_index_n = |
| 919 (inputs_n_length - vd_lane_count + n + 1 + lane) % |
| 920 inputs_n_length; |
| 921 unsigned input_index_m = |
| 922 (inputs_m_length - vd_lane_count + m + 1 + lane) % |
| 923 inputs_m_length; |
| 924 |
| 925 printf( |
| 926 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 |
| 927 " " |
| 928 "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", |
| 929 results[output_index] != expected[output_index] ? '*' : ' ', |
| 930 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]), |
| 931 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]), |
| 932 lane_len_in_hex, static_cast<uint64_t>(inputs_m[input_index_m]), |
| 933 indices[index], lane_len_in_hex, |
| 934 static_cast<uint64_t>(results[output_index]), lane_len_in_hex, |
| 935 static_cast<uint64_t>(expected[output_index])); |
| 936 } |
| 937 } |
| 938 } |
| 939 } |
| 940 } |
| 941 DCHECK_EQ(d, expected_length); |
| 942 if (error_count > kErrorReportLimit) { |
| 943 printf("%u other errors follow.\n", error_count - kErrorReportLimit); |
| 944 } |
| 945 CHECK(error_count == 0); |
| 946 } |
| 947 |
| 948 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ==== |
| 949 |
| 950 template <typename Tm> |
| 951 void Test2OpImmNEON_Helper( |
| 952 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, |
| 953 uintptr_t inputs_n, unsigned inputs_n_length, const Tm inputs_m[], |
| 954 unsigned inputs_m_length, uintptr_t results, VectorFormat vd_form, |
| 955 VectorFormat vn_form) { |
| 956 DCHECK(vd_form != kFormatUndefined && vn_form != kFormatUndefined); |
| 957 |
| 958 SETUP(); |
| 959 START(); |
| 960 |
| 961 // Roll up the loop to keep the code size down. |
| 962 Label loop_n; |
| 963 |
| 964 Register out = x0; |
| 965 Register inputs_n_base = x1; |
| 966 Register inputs_n_last_16bytes = x3; |
| 967 Register index_n = x5; |
| 968 |
| 969 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); |
| 970 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); |
| 971 |
| 972 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); |
| 973 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); |
| 974 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); |
| 975 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); |
| 976 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); |
| 977 |
| 978 // These will be either a D- or a Q-register form, with a single lane |
| 979 // (for use in scalar load and store operations). |
| 980 VRegister vd = VRegister::Create(0, vd_bits); |
| 981 VRegister vn = v1.V16B(); |
| 982 VRegister vntmp = v3.V16B(); |
| 983 |
| 984 // These will have the correct format for use when calling 'helper'. |
| 985 VRegister vd_helper = VRegister::Create(0, vd_bits, vd_lane_count); |
| 986 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); |
| 987 |
| 988 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. |
| 989 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); |
| 990 |
| 991 __ Mov(out, results); |
| 992 |
| 993 __ Mov(inputs_n_base, inputs_n); |
| 994 __ Mov(inputs_n_last_16bytes, |
| 995 inputs_n + (vn_lane_bytes * inputs_n_length) - 16); |
| 996 |
| 997 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); |
| 998 |
| 999 __ Mov(index_n, 0); |
| 1000 __ Bind(&loop_n); |
| 1001 |
| 1002 __ Ldr(vntmp_single, |
| 1003 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); |
| 1004 __ Ext(vn, vn, vntmp, vn_lane_bytes); |
| 1005 |
| 1006 // Set the destination to zero for tests such as '[r]shrn2'. |
| 1007 // TODO(all): Setting the destination to values other than zero might be a |
| 1008 // better test for shift and accumulate instructions (srsra/ssra/usra/ursra). |
| 1009 __ Movi(vd.V16B(), 0); |
| 1010 |
| 1011 { |
| 1012 for (unsigned i = 0; i < inputs_m_length; i++) { |
| 1013 (masm.*helper)(vd_helper, vn_helper, inputs_m[i]); |
| 1014 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex)); |
| 1015 } |
| 1016 } |
| 1017 |
| 1018 __ Add(index_n, index_n, 1); |
| 1019 __ Cmp(index_n, inputs_n_length); |
| 1020 __ B(lo, &loop_n); |
| 1021 |
| 1022 END(); |
| 1023 RUN(); |
| 1024 TEARDOWN(); |
| 1025 } |
| 1026 |
| 1027 // Test NEON instructions. The inputs_*[] and expected[] arrays should be |
| 1028 // arrays of rawbit representation of input values. This ensures that |
| 1029 // exact bit comparisons can be performed. |
| 1030 template <typename Td, typename Tn, typename Tm> |
| 1031 void Test2OpImmNEON(const char* name, |
| 1032 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, |
| 1033 const Tn inputs_n[], unsigned inputs_n_length, |
| 1034 const Tm inputs_m[], unsigned inputs_m_length, |
| 1035 const Td expected[], unsigned expected_length, |
| 1036 VectorFormat vd_form, VectorFormat vn_form) { |
| 1037 DCHECK(inputs_n_length > 0 && inputs_m_length > 0); |
| 1038 |
| 1039 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); |
| 1040 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); |
| 1041 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); |
| 1042 |
| 1043 const unsigned results_length = inputs_n_length * inputs_m_length; |
| 1044 std::vector<Td> results(results_length * vd_lane_count, 0); |
| 1045 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); |
| 1046 |
| 1047 Test2OpImmNEON_Helper(helper, reinterpret_cast<uintptr_t>(inputs_n), |
| 1048 inputs_n_length, inputs_m, inputs_m_length, |
| 1049 reinterpret_cast<uintptr_t>(results.data()), vd_form, |
| 1050 vn_form); |
| 1051 |
| 1052 // Check the results. |
| 1053 CHECK(expected_length == results_length); |
| 1054 unsigned error_count = 0; |
| 1055 unsigned d = 0; |
| 1056 const char* padding = " "; |
| 1057 DCHECK_GE(strlen(padding), lane_len_in_hex + 1); |
| 1058 for (unsigned n = 0; n < inputs_n_length; n++) { |
| 1059 for (unsigned m = 0; m < inputs_m_length; m++, d++) { |
| 1060 bool error_in_vector = false; |
| 1061 |
| 1062 for (unsigned lane = 0; lane < vd_lane_count; lane++) { |
| 1063 unsigned output_index = |
| 1064 (n * inputs_m_length * vd_lane_count) + (m * vd_lane_count) + lane; |
| 1065 |
| 1066 if (results[output_index] != expected[output_index]) { |
| 1067 error_in_vector = true; |
| 1068 break; |
| 1069 } |
| 1070 } |
| 1071 |
| 1072 if (error_in_vector && (++error_count <= kErrorReportLimit)) { |
| 1073 printf("%s\n", name); |
| 1074 printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", lane_len_in_hex + 1, |
| 1075 padding, lane_len_in_hex, padding, lane_len_in_hex + 1, padding); |
| 1076 |
| 1077 const unsigned first_index_n = |
| 1078 inputs_n_length - (16 / vn_lane_bytes) + n + 1; |
| 1079 |
| 1080 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count); |
| 1081 lane++) { |
| 1082 unsigned output_index = (n * inputs_m_length * vd_lane_count) + |
| 1083 (m * vd_lane_count) + lane; |
| 1084 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; |
| 1085 unsigned input_index_m = m; |
| 1086 |
| 1087 printf( |
| 1088 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 |
| 1089 " " |
| 1090 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", |
| 1091 results[output_index] != expected[output_index] ? '*' : ' ', |
| 1092 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]), |
| 1093 lane_len_in_hex, static_cast<uint64_t>(inputs_m[input_index_m]), |
| 1094 lane_len_in_hex, static_cast<uint64_t>(results[output_index]), |
| 1095 lane_len_in_hex, static_cast<uint64_t>(expected[output_index])); |
| 1096 } |
| 1097 } |
| 1098 } |
| 1099 } |
| 1100 DCHECK_EQ(d, expected_length); |
| 1101 if (error_count > kErrorReportLimit) { |
| 1102 printf("%u other errors follow.\n", error_count - kErrorReportLimit); |
| 1103 } |
| 1104 CHECK(error_count == 0); |
| 1105 } |
| 1106 |
| 1107 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ==== |
| 1108 |
| 1109 void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper, |
| 1110 uintptr_t inputs_d, const int inputs_imm1[], |
| 1111 unsigned inputs_imm1_length, uintptr_t inputs_n, |
| 1112 unsigned inputs_n_length, |
| 1113 const int inputs_imm2[], |
| 1114 unsigned inputs_imm2_length, uintptr_t results, |
| 1115 VectorFormat vd_form, VectorFormat vn_form) { |
| 1116 DCHECK_NE(vd_form, kFormatUndefined); |
| 1117 DCHECK_NE(vn_form, kFormatUndefined); |
| 1118 |
| 1119 SETUP(); |
| 1120 START(); |
| 1121 |
| 1122 // Roll up the loop to keep the code size down. |
| 1123 Label loop_n; |
| 1124 |
| 1125 Register out = x0; |
| 1126 Register inputs_d_base = x1; |
| 1127 Register inputs_n_base = x2; |
| 1128 Register inputs_n_last_vector = x4; |
| 1129 Register index_n = x6; |
| 1130 |
| 1131 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); |
| 1132 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); |
| 1133 |
| 1134 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); |
| 1135 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); |
| 1136 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); |
| 1137 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); |
| 1138 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); |
| 1139 |
| 1140 // These will be either a D- or a Q-register form, with a single lane |
| 1141 // (for use in scalar load and store operations). |
| 1142 VRegister vd = VRegister::Create(0, vd_bits); |
| 1143 VRegister vn = VRegister::Create(1, vn_bits); |
| 1144 VRegister vntmp = VRegister::Create(4, vn_bits); |
| 1145 VRegister vres = VRegister::Create(5, vn_bits); |
| 1146 |
| 1147 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); |
| 1148 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count); |
| 1149 |
| 1150 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. |
| 1151 VRegister vntmp_single = VRegister::Create(4, vn_lane_bits); |
| 1152 |
| 1153 // Same registers for use in the 'ext' instructions. |
| 1154 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B(); |
| 1155 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B(); |
| 1156 |
| 1157 __ Mov(out, results); |
| 1158 |
| 1159 __ Mov(inputs_d_base, inputs_d); |
| 1160 |
| 1161 __ Mov(inputs_n_base, inputs_n); |
| 1162 __ Mov(inputs_n_last_vector, |
| 1163 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count)); |
| 1164 |
| 1165 __ Ldr(vd, MemOperand(inputs_d_base)); |
| 1166 |
| 1167 __ Ldr(vn, MemOperand(inputs_n_last_vector)); |
| 1168 |
| 1169 __ Mov(index_n, 0); |
| 1170 __ Bind(&loop_n); |
| 1171 |
| 1172 __ Ldr(vntmp_single, |
| 1173 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); |
| 1174 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes); |
| 1175 |
| 1176 for (unsigned i = 0; i < inputs_imm1_length; i++) { |
| 1177 for (unsigned j = 0; j < inputs_imm2_length; j++) { |
| 1178 __ Mov(vres, vd); |
| 1179 (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]); |
| 1180 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex)); |
| 1181 } |
| 1182 } |
| 1183 |
| 1184 __ Add(index_n, index_n, 1); |
| 1185 __ Cmp(index_n, inputs_n_length); |
| 1186 __ B(lo, &loop_n); |
| 1187 |
| 1188 END(); |
| 1189 RUN(); |
| 1190 TEARDOWN(); |
| 1191 } |
| 1192 |
| 1193 // Test NEON instructions. The inputs_*[] and expected[] arrays should be |
| 1194 // arrays of rawbit representation of input values. This ensures that |
| 1195 // exact bit comparisons can be performed. |
| 1196 template <typename Td, typename Tn> |
| 1197 void TestOpImmOpImmNEON(const char* name, |
| 1198 TestOpImmOpImmVdUpdateNEONHelper_t helper, |
| 1199 const Td inputs_d[], const int inputs_imm1[], |
| 1200 unsigned inputs_imm1_length, const Tn inputs_n[], |
| 1201 unsigned inputs_n_length, const int inputs_imm2[], |
| 1202 unsigned inputs_imm2_length, const Td expected[], |
| 1203 unsigned expected_length, VectorFormat vd_form, |
| 1204 VectorFormat vn_form) { |
| 1205 DCHECK_GT(inputs_n_length, 0U); |
| 1206 DCHECK_GT(inputs_imm1_length, 0U); |
| 1207 DCHECK_GT(inputs_imm2_length, 0U); |
| 1208 |
| 1209 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); |
| 1210 |
| 1211 const unsigned results_length = |
| 1212 inputs_n_length * inputs_imm1_length * inputs_imm2_length; |
| 1213 |
| 1214 std::vector<Td> results(results_length * vd_lane_count, 0); |
| 1215 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); |
| 1216 |
| 1217 TestOpImmOpImmNEON_Helper( |
| 1218 helper, reinterpret_cast<uintptr_t>(inputs_d), inputs_imm1, |
| 1219 inputs_imm1_length, reinterpret_cast<uintptr_t>(inputs_n), |
| 1220 inputs_n_length, inputs_imm2, inputs_imm2_length, |
| 1221 reinterpret_cast<uintptr_t>(results.data()), vd_form, vn_form); |
| 1222 |
| 1223 // Check the results. |
| 1224 CHECK(expected_length == results_length); |
| 1225 unsigned error_count = 0; |
| 1226 unsigned counted_length = 0; |
| 1227 const char* padding = " "; |
| 1228 DCHECK(strlen(padding) >= (lane_len_in_hex + 1)); |
| 1229 for (unsigned n = 0; n < inputs_n_length; n++) { |
| 1230 for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) { |
| 1231 for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) { |
| 1232 bool error_in_vector = false; |
| 1233 |
| 1234 counted_length++; |
| 1235 |
| 1236 for (unsigned lane = 0; lane < vd_lane_count; lane++) { |
| 1237 unsigned output_index = |
| 1238 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) + |
| 1239 (imm1 * inputs_imm2_length * vd_lane_count) + |
| 1240 (imm2 * vd_lane_count) + lane; |
| 1241 |
| 1242 if (results[output_index] != expected[output_index]) { |
| 1243 error_in_vector = true; |
| 1244 break; |
| 1245 } |
| 1246 } |
| 1247 |
| 1248 if (error_in_vector && (++error_count <= kErrorReportLimit)) { |
| 1249 printf("%s\n", name); |
| 1250 printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", |
| 1251 lane_len_in_hex + 1, padding, lane_len_in_hex, padding, |
| 1252 lane_len_in_hex + 1, padding, lane_len_in_hex, padding, |
| 1253 lane_len_in_hex + 1, padding); |
| 1254 |
| 1255 for (unsigned lane = 0; lane < vd_lane_count; lane++) { |
| 1256 unsigned output_index = |
| 1257 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) + |
| 1258 (imm1 * inputs_imm2_length * vd_lane_count) + |
| 1259 (imm2 * vd_lane_count) + lane; |
| 1260 unsigned input_index_n = |
| 1261 (inputs_n_length - vd_lane_count + n + 1 + lane) % |
| 1262 inputs_n_length; |
| 1263 unsigned input_index_imm1 = imm1; |
| 1264 unsigned input_index_imm2 = imm2; |
| 1265 |
| 1266 printf( |
| 1267 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 |
| 1268 " " |
| 1269 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", |
| 1270 results[output_index] != expected[output_index] ? '*' : ' ', |
| 1271 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]), |
| 1272 lane_len_in_hex, |
| 1273 static_cast<uint64_t>(inputs_imm1[input_index_imm1]), |
| 1274 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]), |
| 1275 lane_len_in_hex, |
| 1276 static_cast<uint64_t>(inputs_imm2[input_index_imm2]), |
| 1277 lane_len_in_hex, static_cast<uint64_t>(results[output_index]), |
| 1278 lane_len_in_hex, static_cast<uint64_t>(expected[output_index])); |
| 1279 } |
| 1280 } |
| 1281 } |
| 1282 } |
| 1283 } |
| 1284 DCHECK_EQ(counted_length, expected_length); |
| 1285 if (error_count > kErrorReportLimit) { |
| 1286 printf("%u other errors follow.\n", error_count - kErrorReportLimit); |
| 1287 } |
| 1288 CHECK(error_count == 0); |
| 1289 } |
| 1290 |
| 1291 } // anonymous namespace |
| 1292 |
| 1293 // ==== NEON Tests. ==== |
| 1294 |
| 1295 // clang-format off |
| 1296 |
| 1297 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \ |
| 1298 Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ |
| 1299 &MacroAssembler::mnemonic, input_n, \ |
| 1300 (sizeof(input_n) / sizeof(input_n[0])), \ |
| 1301 kExpected_NEON_##mnemonic##_##vdform, \ |
| 1302 kExpectedCount_NEON_##mnemonic##_##vdform, kFormat##vdform, \ |
| 1303 kFormat##vnform) |
| 1304 |
| 1305 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n) \ |
| 1306 Test1OpAcrossNEON( \ |
| 1307 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(vnform), \ |
| 1308 &MacroAssembler::mnemonic, input_n, \ |
| 1309 (sizeof(input_n) / sizeof(input_n[0])), \ |
| 1310 kExpected_NEON_##mnemonic##_##vdform##_##vnform, \ |
| 1311 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform, kFormat##vdform, \ |
| 1312 kFormat##vnform) |
| 1313 |
| 1314 #define CALL_TEST_NEON_HELPER_2Op(mnemonic, vdform, vnform, vmform, input_d, \ |
| 1315 input_n, input_m) \ |
| 1316 Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ |
| 1317 &MacroAssembler::mnemonic, input_d, input_n, \ |
| 1318 (sizeof(input_n) / sizeof(input_n[0])), input_m, \ |
| 1319 (sizeof(input_m) / sizeof(input_m[0])), \ |
| 1320 kExpected_NEON_##mnemonic##_##vdform, \ |
| 1321 kExpectedCount_NEON_##mnemonic##_##vdform, kFormat##vdform, \ |
| 1322 kFormat##vnform, kFormat##vmform) |
| 1323 |
| 1324 #define CALL_TEST_NEON_HELPER_2OpImm(mnemonic, vdform, vnform, input_n, \ |
| 1325 input_m) \ |
| 1326 Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \ |
| 1327 &MacroAssembler::mnemonic, input_n, \ |
| 1328 (sizeof(input_n) / sizeof(input_n[0])), input_m, \ |
| 1329 (sizeof(input_m) / sizeof(input_m[0])), \ |
| 1330 kExpected_NEON_##mnemonic##_##vdform##_2OPIMM, \ |
| 1331 kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM, \ |
| 1332 kFormat##vdform, kFormat##vnform) |
| 1333 |
| 1334 #define CALL_TEST_NEON_HELPER_ByElement(mnemonic, vdform, vnform, vmform, \ |
| 1335 input_d, input_n, input_m, indices) \ |
| 1336 TestByElementNEON( \ |
| 1337 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \ |
| 1338 vnform) "_" STRINGIFY(vmform), \ |
| 1339 &MacroAssembler::mnemonic, input_d, input_n, \ |
| 1340 (sizeof(input_n) / sizeof(input_n[0])), input_m, \ |
| 1341 (sizeof(input_m) / sizeof(input_m[0])), indices, \ |
| 1342 (sizeof(indices) / sizeof(indices[0])), \ |
| 1343 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ |
| 1344 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ |
| 1345 kFormat##vdform, kFormat##vnform, kFormat##vmform) |
| 1346 |
| 1347 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper, mnemonic, vdform, vnform, \ |
| 1348 input_d, input_imm1, input_n, \ |
| 1349 input_imm2) \ |
| 1350 TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), helper, \ |
| 1351 input_d, input_imm1, \ |
| 1352 (sizeof(input_imm1) / sizeof(input_imm1[0])), input_n, \ |
| 1353 (sizeof(input_n) / sizeof(input_n[0])), input_imm2, \ |
| 1354 (sizeof(input_imm2) / sizeof(input_imm2[0])), \ |
| 1355 kExpected_NEON_##mnemonic##_##vdform, \ |
| 1356 kExpectedCount_NEON_##mnemonic##_##vdform, \ |
| 1357 kFormat##vdform, kFormat##vnform) |
| 1358 |
| 1359 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \ |
| 1360 CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input) |
| 1361 |
| 1362 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \ |
| 1363 SIMTEST(mnemonic##_8B) { \ |
| 1364 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input); \ |
| 1365 } \ |
| 1366 SIMTEST(mnemonic##_16B) { \ |
| 1367 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \ |
| 1368 } |
| 1369 |
| 1370 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) \ |
| 1371 SIMTEST(mnemonic##_4H) { \ |
| 1372 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \ |
| 1373 } \ |
| 1374 SIMTEST(mnemonic##_8H) { \ |
| 1375 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \ |
| 1376 } |
| 1377 |
| 1378 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \ |
| 1379 SIMTEST(mnemonic##_2S) { \ |
| 1380 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \ |
| 1381 } \ |
| 1382 SIMTEST(mnemonic##_4S) { \ |
| 1383 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \ |
| 1384 } |
| 1385 |
| 1386 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \ |
| 1387 DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \ |
| 1388 DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) |
| 1389 |
| 1390 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \ |
| 1391 DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \ |
| 1392 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) |
| 1393 |
| 1394 #define DEFINE_TEST_NEON_2SAME(mnemonic, input) \ |
| 1395 DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \ |
| 1396 SIMTEST(mnemonic##_2D) { \ |
| 1397 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \ |
| 1398 } |
| 1399 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input) \ |
| 1400 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \ |
| 1401 SIMTEST(mnemonic##_2D) { \ |
| 1402 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \ |
| 1403 } |
| 1404 |
| 1405 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \ |
| 1406 SIMTEST(mnemonic##_2S) { \ |
| 1407 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input); \ |
| 1408 } \ |
| 1409 SIMTEST(mnemonic##_4S) { \ |
| 1410 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input); \ |
| 1411 } \ |
| 1412 SIMTEST(mnemonic##_2D) { \ |
| 1413 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \ |
| 1414 } |
| 1415 |
| 1416 #define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input) \ |
| 1417 SIMTEST(mnemonic##_S) { \ |
| 1418 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input); \ |
| 1419 } \ |
| 1420 SIMTEST(mnemonic##_D) { \ |
| 1421 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \ |
| 1422 } |
| 1423 |
| 1424 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \ |
| 1425 SIMTEST(mnemonic##_B) { \ |
| 1426 CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \ |
| 1427 } |
| 1428 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \ |
| 1429 SIMTEST(mnemonic##_H) { \ |
| 1430 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \ |
| 1431 } |
| 1432 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ |
| 1433 SIMTEST(mnemonic##_S) { \ |
| 1434 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \ |
| 1435 } |
| 1436 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) \ |
| 1437 SIMTEST(mnemonic##_D) { \ |
| 1438 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \ |
| 1439 } |
| 1440 |
| 1441 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \ |
| 1442 DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \ |
| 1443 DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \ |
| 1444 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ |
| 1445 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) |
| 1446 |
| 1447 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \ |
| 1448 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ |
| 1449 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) |
| 1450 |
| 1451 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \ |
| 1452 CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n) |
| 1453 |
| 1454 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input) \ |
| 1455 SIMTEST(mnemonic##_B_8B) { \ |
| 1456 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input); \ |
| 1457 } \ |
| 1458 SIMTEST(mnemonic##_B_16B) { \ |
| 1459 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \ |
| 1460 } \ |
| 1461 SIMTEST(mnemonic##_H_4H) { \ |
| 1462 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \ |
| 1463 } \ |
| 1464 SIMTEST(mnemonic##_H_8H) { \ |
| 1465 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \ |
| 1466 } \ |
| 1467 SIMTEST(mnemonic##_S_4S) { \ |
| 1468 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \ |
| 1469 } |
| 1470 |
| 1471 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input) \ |
| 1472 SIMTEST(mnemonic##_H_8B) { \ |
| 1473 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input); \ |
| 1474 } \ |
| 1475 SIMTEST(mnemonic##_H_16B) { \ |
| 1476 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \ |
| 1477 } \ |
| 1478 SIMTEST(mnemonic##_S_4H) { \ |
| 1479 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \ |
| 1480 } \ |
| 1481 SIMTEST(mnemonic##_S_8H) { \ |
| 1482 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \ |
| 1483 } \ |
| 1484 SIMTEST(mnemonic##_D_4S) { \ |
| 1485 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \ |
| 1486 } |
| 1487 |
| 1488 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input) \ |
| 1489 SIMTEST(mnemonic##_S_4S) { \ |
| 1490 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \ |
| 1491 } |
| 1492 |
| 1493 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \ |
| 1494 CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) |
| 1495 |
| 1496 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input) \ |
| 1497 SIMTEST(mnemonic##_4H) { \ |
| 1498 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input); \ |
| 1499 } \ |
| 1500 SIMTEST(mnemonic##_8H) { \ |
| 1501 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \ |
| 1502 } \ |
| 1503 SIMTEST(mnemonic##_2S) { \ |
| 1504 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \ |
| 1505 } \ |
| 1506 SIMTEST(mnemonic##_4S) { \ |
| 1507 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \ |
| 1508 } \ |
| 1509 SIMTEST(mnemonic##_1D) { \ |
| 1510 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \ |
| 1511 } \ |
| 1512 SIMTEST(mnemonic##_2D) { \ |
| 1513 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \ |
| 1514 } |
| 1515 |
| 1516 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input) \ |
| 1517 SIMTEST(mnemonic##_8B) { \ |
| 1518 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input); \ |
| 1519 } \ |
| 1520 SIMTEST(mnemonic##_4H) { \ |
| 1521 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input); \ |
| 1522 } \ |
| 1523 SIMTEST(mnemonic##_2S) { \ |
| 1524 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input); \ |
| 1525 } \ |
| 1526 SIMTEST(mnemonic##2_16B) { \ |
| 1527 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \ |
| 1528 } \ |
| 1529 SIMTEST(mnemonic##2_8H) { \ |
| 1530 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \ |
| 1531 } \ |
| 1532 SIMTEST(mnemonic##2_4S) { \ |
| 1533 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \ |
| 1534 } |
| 1535 |
| 1536 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input) \ |
| 1537 SIMTEST(mnemonic##_4S) { \ |
| 1538 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input); \ |
| 1539 } \ |
| 1540 SIMTEST(mnemonic##_2D) { \ |
| 1541 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input); \ |
| 1542 } \ |
| 1543 SIMTEST(mnemonic##2_4S) { \ |
| 1544 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \ |
| 1545 } \ |
| 1546 SIMTEST(mnemonic##2_2D) { \ |
| 1547 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input); \ |
| 1548 } |
| 1549 |
| 1550 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input) \ |
| 1551 SIMTEST(mnemonic##_4H) { \ |
| 1552 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input); \ |
| 1553 } \ |
| 1554 SIMTEST(mnemonic##_2S) { \ |
| 1555 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \ |
| 1556 } \ |
| 1557 SIMTEST(mnemonic##2_8H) { \ |
| 1558 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input); \ |
| 1559 } \ |
| 1560 SIMTEST(mnemonic##2_4S) { \ |
| 1561 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \ |
| 1562 } |
| 1563 |
| 1564 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input) \ |
| 1565 SIMTEST(mnemonic##_2S) { \ |
| 1566 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \ |
| 1567 } \ |
| 1568 SIMTEST(mnemonic##2_4S) { \ |
| 1569 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \ |
| 1570 } |
| 1571 |
| 1572 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input) \ |
| 1573 SIMTEST(mnemonic##_B) { \ |
| 1574 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \ |
| 1575 } \ |
| 1576 SIMTEST(mnemonic##_H) { \ |
| 1577 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \ |
| 1578 } \ |
| 1579 SIMTEST(mnemonic##_S) { \ |
| 1580 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \ |
| 1581 } |
| 1582 |
| 1583 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input) \ |
| 1584 SIMTEST(mnemonic##_S) { \ |
| 1585 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input); \ |
| 1586 } \ |
| 1587 SIMTEST(mnemonic##_D) { \ |
| 1588 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \ |
| 1589 } |
| 1590 |
| 1591 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \ |
| 1592 { \ |
| 1593 CALL_TEST_NEON_HELPER_2Op(mnemonic, variant, variant, variant, input_d, \ |
| 1594 input_nm, input_nm); \ |
| 1595 } |
| 1596 |
| 1597 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \ |
| 1598 SIMTEST(mnemonic##_8B) { \ |
| 1599 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8B, kInput8bitsAccDestination, \ |
| 1600 kInput8bits##input); \ |
| 1601 } \ |
| 1602 SIMTEST(mnemonic##_16B) { \ |
| 1603 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 16B, kInput8bitsAccDestination, \ |
| 1604 kInput8bits##input); \ |
| 1605 } |
| 1606 |
| 1607 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) \ |
| 1608 SIMTEST(mnemonic##_4H) { \ |
| 1609 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4H, kInput16bitsAccDestination, \ |
| 1610 kInput16bits##input); \ |
| 1611 } \ |
| 1612 SIMTEST(mnemonic##_8H) { \ |
| 1613 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8H, kInput16bitsAccDestination, \ |
| 1614 kInput16bits##input); \ |
| 1615 } \ |
| 1616 SIMTEST(mnemonic##_2S) { \ |
| 1617 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, kInput32bitsAccDestination, \ |
| 1618 kInput32bits##input); \ |
| 1619 } \ |
| 1620 SIMTEST(mnemonic##_4S) { \ |
| 1621 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, kInput32bitsAccDestination, \ |
| 1622 kInput32bits##input); \ |
| 1623 } |
| 1624 |
| 1625 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \ |
| 1626 DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \ |
| 1627 DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) |
| 1628 |
| 1629 #define DEFINE_TEST_NEON_3SAME(mnemonic, input) \ |
| 1630 DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \ |
| 1631 SIMTEST(mnemonic##_2D) { \ |
| 1632 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, kInput64bitsAccDestination, \ |
| 1633 kInput64bits##input); \ |
| 1634 } |
| 1635 |
| 1636 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input) \ |
| 1637 SIMTEST(mnemonic##_2S) { \ |
| 1638 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, kInputFloatAccDestination, \ |
| 1639 kInputFloat##input); \ |
| 1640 } \ |
| 1641 SIMTEST(mnemonic##_4S) { \ |
| 1642 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, kInputFloatAccDestination, \ |
| 1643 kInputFloat##input); \ |
| 1644 } \ |
| 1645 SIMTEST(mnemonic##_2D) { \ |
| 1646 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, kInputDoubleAccDestination, \ |
| 1647 kInputDouble##input); \ |
| 1648 } |
| 1649 |
| 1650 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input) \ |
| 1651 SIMTEST(mnemonic##_D) { \ |
| 1652 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInput64bitsAccDestination, \ |
| 1653 kInput64bits##input); \ |
| 1654 } |
| 1655 |
| 1656 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input) \ |
| 1657 SIMTEST(mnemonic##_H) { \ |
| 1658 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, kInput16bitsAccDestination, \ |
| 1659 kInput16bits##input); \ |
| 1660 } \ |
| 1661 SIMTEST(mnemonic##_S) { \ |
| 1662 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInput32bitsAccDestination, \ |
| 1663 kInput32bits##input); \ |
| 1664 } |
| 1665 |
| 1666 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input) \ |
| 1667 SIMTEST(mnemonic##_B) { \ |
| 1668 CALL_TEST_NEON_HELPER_3SAME(mnemonic, B, kInput8bitsAccDestination, \ |
| 1669 kInput8bits##input); \ |
| 1670 } \ |
| 1671 SIMTEST(mnemonic##_H) { \ |
| 1672 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, kInput16bitsAccDestination, \ |
| 1673 kInput16bits##input); \ |
| 1674 } \ |
| 1675 SIMTEST(mnemonic##_S) { \ |
| 1676 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInput32bitsAccDestination, \ |
| 1677 kInput32bits##input); \ |
| 1678 } \ |
| 1679 SIMTEST(mnemonic##_D) { \ |
| 1680 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInput64bitsAccDestination, \ |
| 1681 kInput64bits##input); \ |
| 1682 } |
| 1683 |
| 1684 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input) \ |
| 1685 SIMTEST(mnemonic##_S) { \ |
| 1686 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInputFloatAccDestination, \ |
| 1687 kInputFloat##input); \ |
| 1688 } \ |
| 1689 SIMTEST(mnemonic##_D) { \ |
| 1690 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInputDoubleAccDestination, \ |
| 1691 kInputDouble##input); \ |
| 1692 } |
| 1693 |
| 1694 #define CALL_TEST_NEON_HELPER_3DIFF(mnemonic, vdform, vnform, vmform, input_d, \ |
| 1695 input_n, input_m) \ |
| 1696 { \ |
| 1697 CALL_TEST_NEON_HELPER_2Op(mnemonic, vdform, vnform, vmform, input_d, \ |
| 1698 input_n, input_m); \ |
| 1699 } |
| 1700 |
| 1701 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \ |
| 1702 SIMTEST(mnemonic##_8H) { \ |
| 1703 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8B, 8B, \ |
| 1704 kInput16bitsAccDestination, \ |
| 1705 kInput8bits##input, kInput8bits##input); \ |
| 1706 } \ |
| 1707 SIMTEST(mnemonic##2_8H) { \ |
| 1708 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 16B, 16B, \ |
| 1709 kInput16bitsAccDestination, \ |
| 1710 kInput8bits##input, kInput8bits##input); \ |
| 1711 } |
| 1712 |
| 1713 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ |
| 1714 SIMTEST(mnemonic##_4S) { \ |
| 1715 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4H, 4H, \ |
| 1716 kInput32bitsAccDestination, \ |
| 1717 kInput16bits##input, kInput16bits##input); \ |
| 1718 } \ |
| 1719 SIMTEST(mnemonic##2_4S) { \ |
| 1720 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 8H, 8H, \ |
| 1721 kInput32bitsAccDestination, \ |
| 1722 kInput16bits##input, kInput16bits##input); \ |
| 1723 } |
| 1724 |
| 1725 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) \ |
| 1726 SIMTEST(mnemonic##_2D) { \ |
| 1727 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2S, 2S, \ |
| 1728 kInput64bitsAccDestination, \ |
| 1729 kInput32bits##input, kInput32bits##input); \ |
| 1730 } \ |
| 1731 SIMTEST(mnemonic##2_2D) { \ |
| 1732 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 4S, 4S, \ |
| 1733 kInput64bitsAccDestination, \ |
| 1734 kInput32bits##input, kInput32bits##input); \ |
| 1735 } |
| 1736 |
| 1737 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \ |
| 1738 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ |
| 1739 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) |
| 1740 |
| 1741 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \ |
| 1742 DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \ |
| 1743 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ |
| 1744 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) |
| 1745 |
| 1746 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \ |
| 1747 SIMTEST(mnemonic##_S) { \ |
| 1748 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, S, H, H, kInput32bitsAccDestination, \ |
| 1749 kInput16bits##input, kInput16bits##input); \ |
| 1750 } |
| 1751 |
| 1752 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \ |
| 1753 SIMTEST(mnemonic##_D) { \ |
| 1754 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, D, S, S, kInput64bitsAccDestination, \ |
| 1755 kInput32bits##input, kInput32bits##input); \ |
| 1756 } |
| 1757 |
| 1758 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \ |
| 1759 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \ |
| 1760 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) |
| 1761 |
| 1762 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input) \ |
| 1763 SIMTEST(mnemonic##_8H) { \ |
| 1764 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8H, 8B, \ |
| 1765 kInput16bitsAccDestination, \ |
| 1766 kInput16bits##input, kInput8bits##input); \ |
| 1767 } \ |
| 1768 SIMTEST(mnemonic##_4S) { \ |
| 1769 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4S, 4H, \ |
| 1770 kInput32bitsAccDestination, \ |
| 1771 kInput32bits##input, kInput16bits##input); \ |
| 1772 } \ |
| 1773 SIMTEST(mnemonic##_2D) { \ |
| 1774 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2D, 2S, \ |
| 1775 kInput64bitsAccDestination, \ |
| 1776 kInput64bits##input, kInput32bits##input); \ |
| 1777 } \ |
| 1778 SIMTEST(mnemonic##2_8H) { \ |
| 1779 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 8H, 16B, \ |
| 1780 kInput16bitsAccDestination, \ |
| 1781 kInput16bits##input, kInput8bits##input); \ |
| 1782 } \ |
| 1783 SIMTEST(mnemonic##2_4S) { \ |
| 1784 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 4S, 8H, \ |
| 1785 kInput32bitsAccDestination, \ |
| 1786 kInput32bits##input, kInput16bits##input); \ |
| 1787 } \ |
| 1788 SIMTEST(mnemonic##2_2D) { \ |
| 1789 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 2D, 4S, \ |
| 1790 kInput64bitsAccDestination, \ |
| 1791 kInput64bits##input, kInput32bits##input); \ |
| 1792 } |
| 1793 |
| 1794 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input) \ |
| 1795 SIMTEST(mnemonic##_8B) { \ |
| 1796 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8B, 8H, 8H, \ |
| 1797 kInput8bitsAccDestination, \ |
| 1798 kInput16bits##input, kInput16bits##input); \ |
| 1799 } \ |
| 1800 SIMTEST(mnemonic##_4H) { \ |
| 1801 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4H, 4S, 4S, \ |
| 1802 kInput16bitsAccDestination, \ |
| 1803 kInput32bits##input, kInput32bits##input); \ |
| 1804 } \ |
| 1805 SIMTEST(mnemonic##_2S) { \ |
| 1806 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2S, 2D, 2D, \ |
| 1807 kInput32bitsAccDestination, \ |
| 1808 kInput64bits##input, kInput64bits##input); \ |
| 1809 } \ |
| 1810 SIMTEST(mnemonic##2_16B) { \ |
| 1811 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 16B, 8H, 8H, \ |
| 1812 kInput8bitsAccDestination, \ |
| 1813 kInput16bits##input, kInput16bits##input); \ |
| 1814 } \ |
| 1815 SIMTEST(mnemonic##2_8H) { \ |
| 1816 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 4S, 4S, \ |
| 1817 kInput16bitsAccDestination, \ |
| 1818 kInput32bits##input, kInput32bits##input); \ |
| 1819 } \ |
| 1820 SIMTEST(mnemonic##2_4S) { \ |
| 1821 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 2D, 2D, \ |
| 1822 kInput32bitsAccDestination, \ |
| 1823 kInput64bits##input, kInput64bits##input); \ |
| 1824 } |
| 1825 |
| 1826 #define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, vdform, vnform, input_n, \ |
| 1827 input_imm) \ |
| 1828 { \ |
| 1829 CALL_TEST_NEON_HELPER_2OpImm(mnemonic, vdform, vnform, input_n, \ |
| 1830 input_imm); \ |
| 1831 } |
| 1832 |
| 1833 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm) \ |
| 1834 SIMTEST(mnemonic##_8B_2OPIMM) { \ |
| 1835 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, 8B, kInput8bits##input, \ |
| 1836 kInput8bitsImm##input_imm); \ |
| 1837 } \ |
| 1838 SIMTEST(mnemonic##_16B_2OPIMM) { \ |
| 1839 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 16B, 16B, kInput8bits##input, \ |
| 1840 kInput8bitsImm##input_imm); \ |
| 1841 } \ |
| 1842 SIMTEST(mnemonic##_4H_2OPIMM) { \ |
| 1843 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, 4H, kInput16bits##input, \ |
| 1844 kInput16bitsImm##input_imm); \ |
| 1845 } \ |
| 1846 SIMTEST(mnemonic##_8H_2OPIMM) { \ |
| 1847 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, 8H, kInput16bits##input, \ |
| 1848 kInput16bitsImm##input_imm); \ |
| 1849 } \ |
| 1850 SIMTEST(mnemonic##_2S_2OPIMM) { \ |
| 1851 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInput32bits##input, \ |
| 1852 kInput32bitsImm##input_imm); \ |
| 1853 } \ |
| 1854 SIMTEST(mnemonic##_4S_2OPIMM) { \ |
| 1855 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInput32bits##input, \ |
| 1856 kInput32bitsImm##input_imm); \ |
| 1857 } \ |
| 1858 SIMTEST(mnemonic##_2D_2OPIMM) { \ |
| 1859 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInput64bits##input, \ |
| 1860 kInput64bitsImm##input_imm); \ |
| 1861 } |
| 1862 |
| 1863 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \ |
| 1864 SIMTEST(mnemonic##_8B_2OPIMM) { \ |
| 1865 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, B, kInput8bits##input, \ |
| 1866 kInput8bitsImm##input_imm); \ |
| 1867 } \ |
| 1868 SIMTEST(mnemonic##_16B_2OPIMM) { \ |
| 1869 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 16B, B, kInput8bits##input, \ |
| 1870 kInput8bitsImm##input_imm); \ |
| 1871 } \ |
| 1872 SIMTEST(mnemonic##_4H_2OPIMM) { \ |
| 1873 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, H, kInput16bits##input, \ |
| 1874 kInput16bitsImm##input_imm); \ |
| 1875 } \ |
| 1876 SIMTEST(mnemonic##_8H_2OPIMM) { \ |
| 1877 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, H, kInput16bits##input, \ |
| 1878 kInput16bitsImm##input_imm); \ |
| 1879 } \ |
| 1880 SIMTEST(mnemonic##_2S_2OPIMM) { \ |
| 1881 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, S, kInput32bits##input, \ |
| 1882 kInput32bitsImm##input_imm); \ |
| 1883 } \ |
| 1884 SIMTEST(mnemonic##_4S_2OPIMM) { \ |
| 1885 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, S, kInput32bits##input, \ |
| 1886 kInput32bitsImm##input_imm); \ |
| 1887 } \ |
| 1888 SIMTEST(mnemonic##_2D_2OPIMM) { \ |
| 1889 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, D, kInput64bits##input, \ |
| 1890 kInput64bitsImm##input_imm); \ |
| 1891 } |
| 1892 |
| 1893 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \ |
| 1894 SIMTEST(mnemonic##_8B_2OPIMM) { \ |
| 1895 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, 8H, kInput16bits##input, \ |
| 1896 kInput8bitsImm##input_imm); \ |
| 1897 } \ |
| 1898 SIMTEST(mnemonic##_4H_2OPIMM) { \ |
| 1899 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, 4S, kInput32bits##input, \ |
| 1900 kInput16bitsImm##input_imm); \ |
| 1901 } \ |
| 1902 SIMTEST(mnemonic##_2S_2OPIMM) { \ |
| 1903 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2D, kInput64bits##input, \ |
| 1904 kInput32bitsImm##input_imm); \ |
| 1905 } \ |
| 1906 SIMTEST(mnemonic##2_16B_2OPIMM) { \ |
| 1907 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 16B, 8H, kInput16bits##input, \ |
| 1908 kInput8bitsImm##input_imm); \ |
| 1909 } \ |
| 1910 SIMTEST(mnemonic##2_8H_2OPIMM) { \ |
| 1911 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 8H, 4S, kInput32bits##input, \ |
| 1912 kInput16bitsImm##input_imm); \ |
| 1913 } \ |
| 1914 SIMTEST(mnemonic##2_4S_2OPIMM) { \ |
| 1915 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 4S, 2D, kInput64bits##input, \ |
| 1916 kInput32bitsImm##input_imm); \ |
| 1917 } |
| 1918 |
| 1919 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \ |
| 1920 SIMTEST(mnemonic##_B_2OPIMM) { \ |
| 1921 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, B, H, kInput16bits##input, \ |
| 1922 kInput8bitsImm##input_imm); \ |
| 1923 } \ |
| 1924 SIMTEST(mnemonic##_H_2OPIMM) { \ |
| 1925 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, H, S, kInput32bits##input, \ |
| 1926 kInput16bitsImm##input_imm); \ |
| 1927 } \ |
| 1928 SIMTEST(mnemonic##_S_2OPIMM) { \ |
| 1929 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, D, kInput64bits##input, \ |
| 1930 kInput32bitsImm##input_imm); \ |
| 1931 } |
| 1932 |
| 1933 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \ |
| 1934 SIMTEST(mnemonic##_2S_2OPIMM) { \ |
| 1935 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInputFloat##Basic, \ |
| 1936 kInputDoubleImm##input_imm) \ |
| 1937 } \ |
| 1938 SIMTEST(mnemonic##_4S_2OPIMM) { \ |
| 1939 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInputFloat##input, \ |
| 1940 kInputDoubleImm##input_imm); \ |
| 1941 } \ |
| 1942 SIMTEST(mnemonic##_2D_2OPIMM) { \ |
| 1943 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInputDouble##input, \ |
| 1944 kInputDoubleImm##input_imm); \ |
| 1945 } |
| 1946 |
| 1947 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \ |
| 1948 SIMTEST(mnemonic##_2S_2OPIMM) { \ |
| 1949 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInputFloat##Basic, \ |
| 1950 kInput32bitsImm##input_imm) \ |
| 1951 } \ |
| 1952 SIMTEST(mnemonic##_4S_2OPIMM) { \ |
| 1953 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInputFloat##input, \ |
| 1954 kInput32bitsImm##input_imm) \ |
| 1955 } \ |
| 1956 SIMTEST(mnemonic##_2D_2OPIMM) { \ |
| 1957 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInputDouble##input, \ |
| 1958 kInput64bitsImm##input_imm) \ |
| 1959 } |
| 1960 |
| 1961 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \ |
| 1962 SIMTEST(mnemonic##_S_2OPIMM) { \ |
| 1963 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInputFloat##Basic, \ |
| 1964 kInput32bitsImm##input_imm) \ |
| 1965 } \ |
| 1966 SIMTEST(mnemonic##_D_2OPIMM) { \ |
| 1967 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInputDouble##input, \ |
| 1968 kInput64bitsImm##input_imm) \ |
| 1969 } |
| 1970 |
| 1971 #define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm) \ |
| 1972 SIMTEST(mnemonic##_2S_2OPIMM) { \ |
| 1973 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInput32bits##input, \ |
| 1974 kInput32bitsImm##input_imm); \ |
| 1975 } \ |
| 1976 SIMTEST(mnemonic##_4S_2OPIMM) { \ |
| 1977 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInput32bits##input, \ |
| 1978 kInput32bitsImm##input_imm); \ |
| 1979 } \ |
| 1980 SIMTEST(mnemonic##_2D_2OPIMM) { \ |
| 1981 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInput64bits##input, \ |
| 1982 kInput64bitsImm##input_imm); \ |
| 1983 } |
| 1984 |
| 1985 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \ |
| 1986 SIMTEST(mnemonic##_D_2OPIMM) { \ |
| 1987 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInput64bits##input, \ |
| 1988 kInput64bitsImm##input_imm); \ |
| 1989 } |
| 1990 |
| 1991 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) \ |
| 1992 SIMTEST(mnemonic##_S_2OPIMM) { \ |
| 1993 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInput32bits##input, \ |
| 1994 kInput32bitsImm##input_imm); \ |
| 1995 } \ |
| 1996 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) |
| 1997 |
| 1998 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \ |
| 1999 SIMTEST(mnemonic##_D_2OPIMM) { \ |
| 2000 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInputDouble##input, \ |
| 2001 kInputDoubleImm##input_imm); \ |
| 2002 } |
| 2003 |
| 2004 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm) \ |
| 2005 SIMTEST(mnemonic##_S_2OPIMM) { \ |
| 2006 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInputFloat##input, \ |
| 2007 kInputDoubleImm##input_imm); \ |
| 2008 } \ |
| 2009 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) |
| 2010 |
| 2011 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \ |
| 2012 SIMTEST(mnemonic##_B_2OPIMM) { \ |
| 2013 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, B, B, kInput8bits##input, \ |
| 2014 kInput8bitsImm##input_imm); \ |
| 2015 } \ |
| 2016 SIMTEST(mnemonic##_H_2OPIMM) { \ |
| 2017 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, H, H, kInput16bits##input, \ |
| 2018 kInput16bitsImm##input_imm); \ |
| 2019 } \ |
| 2020 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) |
| 2021 |
| 2022 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \ |
| 2023 SIMTEST(mnemonic##_8H_2OPIMM) { \ |
| 2024 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, 8B, kInput8bits##input, \ |
| 2025 kInput8bitsImm##input_imm); \ |
| 2026 } \ |
| 2027 SIMTEST(mnemonic##_4S_2OPIMM) { \ |
| 2028 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4H, kInput16bits##input, \ |
| 2029 kInput16bitsImm##input_imm); \ |
| 2030 } \ |
| 2031 SIMTEST(mnemonic##_2D_2OPIMM) { \ |
| 2032 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2S, kInput32bits##input, \ |
| 2033 kInput32bitsImm##input_imm); \ |
| 2034 } \ |
| 2035 SIMTEST(mnemonic##2_8H_2OPIMM) { \ |
| 2036 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 8H, 16B, kInput8bits##input, \ |
| 2037 kInput8bitsImm##input_imm); \ |
| 2038 } \ |
| 2039 SIMTEST(mnemonic##2_4S_2OPIMM) { \ |
| 2040 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 4S, 8H, kInput16bits##input, \ |
| 2041 kInput16bitsImm##input_imm); \ |
| 2042 } \ |
| 2043 SIMTEST(mnemonic##2_2D_2OPIMM) { \ |
| 2044 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 2D, 4S, kInput32bits##input, \ |
| 2045 kInput32bitsImm##input_imm); \ |
| 2046 } |
| 2047 |
| 2048 #define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, vdform, vnform, vmform, \ |
| 2049 input_d, input_n, input_m, indices) \ |
| 2050 { \ |
| 2051 CALL_TEST_NEON_HELPER_ByElement(mnemonic, vdform, vnform, vmform, input_d, \ |
| 2052 input_n, input_m, indices); \ |
| 2053 } |
| 2054 |
| 2055 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \ |
| 2056 SIMTEST(mnemonic##_4H_4H_H) { \ |
| 2057 CALL_TEST_NEON_HELPER_BYELEMENT( \ |
| 2058 mnemonic, 4H, 4H, H, kInput16bits##input_d, kInput16bits##input_n, \ |
| 2059 kInput16bits##input_m, kInputHIndices); \ |
| 2060 } \ |
| 2061 SIMTEST(mnemonic##_8H_8H_H) { \ |
| 2062 CALL_TEST_NEON_HELPER_BYELEMENT( \ |
| 2063 mnemonic, 8H, 8H, H, kInput16bits##input_d, kInput16bits##input_n, \ |
| 2064 kInput16bits##input_m, kInputHIndices); \ |
| 2065 } \ |
| 2066 SIMTEST(mnemonic##_2S_2S_S) { \ |
| 2067 CALL_TEST_NEON_HELPER_BYELEMENT( \ |
| 2068 mnemonic, 2S, 2S, S, kInput32bits##input_d, kInput32bits##input_n, \ |
| 2069 kInput32bits##input_m, kInputSIndices); \ |
| 2070 } \ |
| 2071 SIMTEST(mnemonic##_4S_4S_S) { \ |
| 2072 CALL_TEST_NEON_HELPER_BYELEMENT( \ |
| 2073 mnemonic, 4S, 4S, S, kInput32bits##input_d, kInput32bits##input_n, \ |
| 2074 kInput32bits##input_m, kInputSIndices); \ |
| 2075 } |
| 2076 |
| 2077 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \ |
| 2078 SIMTEST(mnemonic##_H_H_H) { \ |
| 2079 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, H, H, H, kInput16bits##input_d, \ |
| 2080 kInput16bits##input_n, \ |
| 2081 kInput16bits##input_m, kInputHIndices); \ |
| 2082 } \ |
| 2083 SIMTEST(mnemonic##_S_S_S) { \ |
| 2084 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, S, S, kInput32bits##input_d, \ |
| 2085 kInput32bits##input_n, \ |
| 2086 kInput32bits##input_m, kInputSIndices); \ |
| 2087 } |
| 2088 |
| 2089 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \ |
| 2090 SIMTEST(mnemonic##_2S_2S_S) { \ |
| 2091 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, 2S, 2S, S, kInputFloat##input_d, \ |
| 2092 kInputFloat##input_n, \ |
| 2093 kInputFloat##input_m, kInputSIndices); \ |
| 2094 } \ |
| 2095 SIMTEST(mnemonic##_4S_4S_S) { \ |
| 2096 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, 4S, 4S, S, kInputFloat##input_d, \ |
| 2097 kInputFloat##input_n, \ |
| 2098 kInputFloat##input_m, kInputSIndices); \ |
| 2099 } \ |
| 2100 SIMTEST(mnemonic##_2D_2D_D) { \ |
| 2101 CALL_TEST_NEON_HELPER_BYELEMENT( \ |
| 2102 mnemonic, 2D, 2D, D, kInputDouble##input_d, kInputDouble##input_n, \ |
| 2103 kInputDouble##input_m, kInputDIndices); \ |
| 2104 } |
| 2105 |
| 2106 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \ |
| 2107 SIMTEST(mnemonic##_S_S_S) { \ |
| 2108 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, S, S, kInputFloat##inp_d, \ |
| 2109 kInputFloat##inp_n, kInputFloat##inp_m, \ |
| 2110 kInputSIndices); \ |
| 2111 } \ |
| 2112 SIMTEST(mnemonic##_D_D_D) { \ |
| 2113 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, D, D, D, kInputDouble##inp_d, \ |
| 2114 kInputDouble##inp_n, kInputDouble##inp_m, \ |
| 2115 kInputDIndices); \ |
| 2116 } |
| 2117 |
| 2118 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \ |
| 2119 SIMTEST(mnemonic##_4S_4H_H) { \ |
| 2120 CALL_TEST_NEON_HELPER_BYELEMENT( \ |
| 2121 mnemonic, 4S, 4H, H, kInput32bits##input_d, kInput16bits##input_n, \ |
| 2122 kInput16bits##input_m, kInputHIndices); \ |
| 2123 } \ |
| 2124 SIMTEST(mnemonic##2_4S_8H_H) { \ |
| 2125 CALL_TEST_NEON_HELPER_BYELEMENT( \ |
| 2126 mnemonic##2, 4S, 8H, H, kInput32bits##input_d, kInput16bits##input_n, \ |
| 2127 kInput16bits##input_m, kInputHIndices); \ |
| 2128 } \ |
| 2129 SIMTEST(mnemonic##_2D_2S_S) { \ |
| 2130 CALL_TEST_NEON_HELPER_BYELEMENT( \ |
| 2131 mnemonic, 2D, 2S, S, kInput64bits##input_d, kInput32bits##input_n, \ |
| 2132 kInput32bits##input_m, kInputSIndices); \ |
| 2133 } \ |
| 2134 SIMTEST(mnemonic##2_2D_4S_S) { \ |
| 2135 CALL_TEST_NEON_HELPER_BYELEMENT( \ |
| 2136 mnemonic##2, 2D, 4S, S, kInput64bits##input_d, kInput32bits##input_n, \ |
| 2137 kInput32bits##input_m, kInputSIndices); \ |
| 2138 } |
| 2139 |
| 2140 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic, input_d, input_n, \ |
| 2141 input_m) \ |
| 2142 SIMTEST(mnemonic##_S_H_H) { \ |
| 2143 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, H, H, kInput32bits##input_d, \ |
| 2144 kInput16bits##input_n, \ |
| 2145 kInput16bits##input_m, kInputHIndices); \ |
| 2146 } \ |
| 2147 SIMTEST(mnemonic##_D_S_S) { \ |
| 2148 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, D, S, S, kInput64bits##input_d, \ |
| 2149 kInput32bits##input_n, \ |
| 2150 kInput32bits##input_m, kInputSIndices); \ |
| 2151 } |
| 2152 |
| 2153 #define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, variant, input_d, input_imm1, \ |
| 2154 input_n, input_imm2) \ |
| 2155 { \ |
| 2156 CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, mnemonic, \ |
| 2157 variant, variant, input_d, input_imm1, \ |
| 2158 input_n, input_imm2); \ |
| 2159 } |
| 2160 |
| 2161 #define DEFINE_TEST_NEON_2OP2IMM(mnemonic, input_d, input_imm1, input_n, \ |
| 2162 input_imm2) \ |
| 2163 SIMTEST(mnemonic##_B) { \ |
| 2164 CALL_TEST_NEON_HELPER_2OP2IMM( \ |
| 2165 mnemonic, 16B, kInput8bits##input_d, kInput8bitsImm##input_imm1, \ |
| 2166 kInput8bits##input_n, kInput8bitsImm##input_imm2); \ |
| 2167 } \ |
| 2168 SIMTEST(mnemonic##_H) { \ |
| 2169 CALL_TEST_NEON_HELPER_2OP2IMM( \ |
| 2170 mnemonic, 8H, kInput16bits##input_d, kInput16bitsImm##input_imm1, \ |
| 2171 kInput16bits##input_n, kInput16bitsImm##input_imm2); \ |
| 2172 } \ |
| 2173 SIMTEST(mnemonic##_S) { \ |
| 2174 CALL_TEST_NEON_HELPER_2OP2IMM( \ |
| 2175 mnemonic, 4S, kInput32bits##input_d, kInput32bitsImm##input_imm1, \ |
| 2176 kInput32bits##input_n, kInput32bitsImm##input_imm2); \ |
| 2177 } \ |
| 2178 SIMTEST(mnemonic##_D) { \ |
| 2179 CALL_TEST_NEON_HELPER_2OP2IMM( \ |
| 2180 mnemonic, 2D, kInput64bits##input_d, kInput64bitsImm##input_imm1, \ |
| 2181 kInput64bits##input_n, kInput64bitsImm##input_imm2); \ |
| 2182 } |
| 2183 |
| 2184 // clang-format on |
| 2185 |
| 2186 // Advanced SIMD copy. |
| 2187 DEFINE_TEST_NEON_2OP2IMM(ins, Basic, LaneCountFromZero, Basic, |
| 2188 LaneCountFromZero) |
| 2189 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero) |
| 2190 |
| 2191 // Advanced SIMD scalar copy. |
| 2192 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero) |
| 2193 |
| 2194 // Advanced SIMD three same. |
| 2195 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic) |
| 2196 DEFINE_TEST_NEON_3SAME(sqadd, Basic) |
| 2197 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic) |
| 2198 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic) |
| 2199 DEFINE_TEST_NEON_3SAME(sqsub, Basic) |
| 2200 DEFINE_TEST_NEON_3SAME(cmgt, Basic) |
| 2201 DEFINE_TEST_NEON_3SAME(cmge, Basic) |
| 2202 DEFINE_TEST_NEON_3SAME(sshl, Basic) |
| 2203 DEFINE_TEST_NEON_3SAME(sqshl, Basic) |
| 2204 DEFINE_TEST_NEON_3SAME(srshl, Basic) |
| 2205 DEFINE_TEST_NEON_3SAME(sqrshl, Basic) |
| 2206 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic) |
| 2207 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic) |
| 2208 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic) |
| 2209 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic) |
| 2210 DEFINE_TEST_NEON_3SAME(add, Basic) |
| 2211 DEFINE_TEST_NEON_3SAME(cmtst, Basic) |
| 2212 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic) |
| 2213 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic) |
| 2214 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic) |
| 2215 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic) |
| 2216 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic) |
| 2217 DEFINE_TEST_NEON_3SAME(addp, Basic) |
| 2218 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic) |
| 2219 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic) |
| 2220 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic) |
| 2221 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic) |
| 2222 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic) |
| 2223 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic) |
| 2224 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic) |
| 2225 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic) |
| 2226 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic) |
| 2227 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic) |
| 2228 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic) |
| 2229 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic) |
| 2230 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic) |
| 2231 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic) |
| 2232 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic) |
| 2233 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic) |
| 2234 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic) |
| 2235 DEFINE_TEST_NEON_3SAME(uqadd, Basic) |
| 2236 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic) |
| 2237 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic) |
| 2238 DEFINE_TEST_NEON_3SAME(uqsub, Basic) |
| 2239 DEFINE_TEST_NEON_3SAME(cmhi, Basic) |
| 2240 DEFINE_TEST_NEON_3SAME(cmhs, Basic) |
| 2241 DEFINE_TEST_NEON_3SAME(ushl, Basic) |
| 2242 DEFINE_TEST_NEON_3SAME(uqshl, Basic) |
| 2243 DEFINE_TEST_NEON_3SAME(urshl, Basic) |
| 2244 DEFINE_TEST_NEON_3SAME(uqrshl, Basic) |
| 2245 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic) |
| 2246 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic) |
| 2247 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic) |
| 2248 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic) |
| 2249 DEFINE_TEST_NEON_3SAME(sub, Basic) |
| 2250 DEFINE_TEST_NEON_3SAME(cmeq, Basic) |
| 2251 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic) |
| 2252 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic) |
| 2253 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic) |
| 2254 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic) |
| 2255 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic) |
| 2256 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic) |
| 2257 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic) |
| 2258 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic) |
| 2259 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic) |
| 2260 DEFINE_TEST_NEON_3SAME_FP(facge, Basic) |
| 2261 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic) |
| 2262 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic) |
| 2263 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic) |
| 2264 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic) |
| 2265 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic) |
| 2266 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic) |
| 2267 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic) |
| 2268 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic) |
| 2269 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic) |
| 2270 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic) |
| 2271 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic) |
| 2272 |
| 2273 // Advanced SIMD scalar three same. |
| 2274 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic) |
| 2275 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic) |
| 2276 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic) |
| 2277 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic) |
| 2278 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic) |
| 2279 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic) |
| 2280 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic) |
| 2281 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic) |
| 2282 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic) |
| 2283 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic) |
| 2284 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic) |
| 2285 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic) |
| 2286 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic) |
| 2287 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic) |
| 2288 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic) |
| 2289 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic) |
| 2290 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic) |
| 2291 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic) |
| 2292 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic) |
| 2293 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic) |
| 2294 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic) |
| 2295 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic) |
| 2296 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic) |
| 2297 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic) |
| 2298 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic) |
| 2299 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic) |
| 2300 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic) |
| 2301 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic) |
| 2302 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic) |
| 2303 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic) |
| 2304 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic) |
| 2305 |
| 2306 // Advanced SIMD three different. |
| 2307 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic) |
| 2308 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic) |
| 2309 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic) |
| 2310 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic) |
| 2311 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic) |
| 2312 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic) |
| 2313 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic) |
| 2314 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic) |
| 2315 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic) |
| 2316 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic) |
| 2317 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic) |
| 2318 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic) |
| 2319 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic) |
| 2320 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic) |
| 2321 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic) |
| 2322 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic) |
| 2323 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic) |
| 2324 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic) |
| 2325 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic) |
| 2326 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic) |
| 2327 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic) |
| 2328 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic) |
| 2329 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic) |
| 2330 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic) |
| 2331 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic) |
| 2332 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic) |
| 2333 |
| 2334 // Advanced SIMD scalar three different. |
| 2335 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic) |
| 2336 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic) |
| 2337 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic) |
| 2338 |
| 2339 // Advanced SIMD scalar pairwise. |
| 2340 SIMTEST(addp_SCALAR) { |
| 2341 CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic); |
| 2342 } |
| 2343 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic) |
| 2344 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic) |
| 2345 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic) |
| 2346 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic) |
| 2347 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic) |
| 2348 |
| 2349 // Advanced SIMD shift by immediate. |
| 2350 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth) |
| 2351 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth) |
| 2352 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth) |
| 2353 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth) |
| 2354 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero) |
| 2355 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero) |
| 2356 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth) |
| 2357 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth) |
| 2358 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth) |
| 2359 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth) |
| 2360 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero) |
| 2361 DEFINE_TEST_NEON_2OPIMM_SD(scvtf, FixedPointConversions, |
| 2362 TypeWidthFromZeroToWidth) |
| 2363 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth) |
| 2364 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth) |
| 2365 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth) |
| 2366 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth) |
| 2367 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth) |
| 2368 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth) |
| 2369 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero) |
| 2370 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero) |
| 2371 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero) |
| 2372 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth) |
| 2373 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth) |
| 2374 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth) |
| 2375 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth) |
| 2376 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero) |
| 2377 DEFINE_TEST_NEON_2OPIMM_SD(ucvtf, FixedPointConversions, |
| 2378 TypeWidthFromZeroToWidth) |
| 2379 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth) |
| 2380 |
| 2381 // Advanced SIMD scalar shift by immediate.. |
| 2382 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth) |
| 2383 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth) |
| 2384 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth) |
| 2385 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth) |
| 2386 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero) |
| 2387 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero) |
| 2388 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth) |
| 2389 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth) |
| 2390 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf, FixedPointConversions, |
| 2391 TypeWidthFromZeroToWidth) |
| 2392 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth) |
| 2393 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth) |
| 2394 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth) |
| 2395 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth) |
| 2396 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth) |
| 2397 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth) |
| 2398 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero) |
| 2399 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero) |
| 2400 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero) |
| 2401 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth) |
| 2402 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth) |
| 2403 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth) |
| 2404 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth) |
| 2405 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf, FixedPointConversions, |
| 2406 TypeWidthFromZeroToWidth) |
| 2407 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth) |
| 2408 |
| 2409 // Advanced SIMD two-register miscellaneous. |
| 2410 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic) |
| 2411 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic) |
| 2412 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic) |
| 2413 DEFINE_TEST_NEON_2SAME(suqadd, Basic) |
| 2414 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic) |
| 2415 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic) |
| 2416 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic) |
| 2417 DEFINE_TEST_NEON_2SAME(sqabs, Basic) |
| 2418 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero) |
| 2419 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero) |
| 2420 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero) |
| 2421 DEFINE_TEST_NEON_2SAME(abs, Basic) |
| 2422 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic) |
| 2423 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic) |
| 2424 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions) |
| 2425 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions) |
| 2426 DEFINE_TEST_NEON_2SAME_FP(frintn, Conversions) |
| 2427 DEFINE_TEST_NEON_2SAME_FP(frintm, Conversions) |
| 2428 DEFINE_TEST_NEON_2SAME_FP(fcvtns, Conversions) |
| 2429 DEFINE_TEST_NEON_2SAME_FP(fcvtms, Conversions) |
| 2430 DEFINE_TEST_NEON_2SAME_FP(fcvtas, Conversions) |
| 2431 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0. |
| 2432 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero) |
| 2433 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero) |
| 2434 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero) |
| 2435 DEFINE_TEST_NEON_2SAME_FP(fabs, Basic) |
| 2436 DEFINE_TEST_NEON_2SAME_FP(frintp, Conversions) |
| 2437 DEFINE_TEST_NEON_2SAME_FP(frintz, Conversions) |
| 2438 DEFINE_TEST_NEON_2SAME_FP(fcvtps, Conversions) |
| 2439 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0. |
| 2440 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic) |
| 2441 DEFINE_TEST_NEON_2SAME_FP(frecpe, Basic) |
| 2442 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic) |
| 2443 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic) |
| 2444 DEFINE_TEST_NEON_2SAME(usqadd, Basic) |
| 2445 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic) |
| 2446 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic) |
| 2447 DEFINE_TEST_NEON_2SAME(sqneg, Basic) |
| 2448 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero) |
| 2449 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero) |
| 2450 DEFINE_TEST_NEON_2SAME(neg, Basic) |
| 2451 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic) |
| 2452 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL) |
| 2453 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic) |
| 2454 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions) |
| 2455 DEFINE_TEST_NEON_2SAME_FP(frinta, Conversions) |
| 2456 DEFINE_TEST_NEON_2SAME_FP(frintx, Conversions) |
| 2457 DEFINE_TEST_NEON_2SAME_FP(fcvtnu, Conversions) |
| 2458 DEFINE_TEST_NEON_2SAME_FP(fcvtmu, Conversions) |
| 2459 DEFINE_TEST_NEON_2SAME_FP(fcvtau, Conversions) |
| 2460 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0. |
| 2461 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic) |
| 2462 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic) |
| 2463 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero) |
| 2464 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero) |
| 2465 DEFINE_TEST_NEON_2SAME_FP(fneg, Basic) |
| 2466 DEFINE_TEST_NEON_2SAME_FP(frinti, Conversions) |
| 2467 DEFINE_TEST_NEON_2SAME_FP(fcvtpu, Conversions) |
| 2468 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0. |
| 2469 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic) |
| 2470 DEFINE_TEST_NEON_2SAME_FP(frsqrte, Basic) |
| 2471 DEFINE_TEST_NEON_2SAME_FP(fsqrt, Basic) |
| 2472 |
| 2473 // Advanced SIMD scalar two-register miscellaneous. |
| 2474 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic) |
| 2475 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic) |
| 2476 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero) |
| 2477 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero) |
| 2478 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero) |
| 2479 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic) |
| 2480 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic) |
| 2481 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtns, Conversions) |
| 2482 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtms, Conversions) |
| 2483 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtas, Conversions) |
| 2484 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0. |
| 2485 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmgt, Basic, Zero) |
| 2486 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmeq, Basic, Zero) |
| 2487 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmlt, Basic, Zero) |
| 2488 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtps, Conversions) |
| 2489 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0. |
| 2490 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpe, Basic) |
| 2491 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpx, Basic) |
| 2492 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic) |
| 2493 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic) |
| 2494 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero) |
| 2495 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero) |
| 2496 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic) |
| 2497 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic) |
| 2498 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic) |
| 2499 SIMTEST(fcvtxn_SCALAR) { |
| 2500 CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions); |
| 2501 } |
| 2502 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtnu, Conversions) |
| 2503 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtmu, Conversions) |
| 2504 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtau, Conversions) |
| 2505 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0. |
| 2506 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmge, Basic, Zero) |
| 2507 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmle, Basic, Zero) |
| 2508 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtpu, Conversions) |
| 2509 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0. |
| 2510 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frsqrte, Basic) |
| 2511 |
| 2512 // Advanced SIMD across lanes. |
| 2513 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic) |
| 2514 DEFINE_TEST_NEON_ACROSS(smaxv, Basic) |
| 2515 DEFINE_TEST_NEON_ACROSS(sminv, Basic) |
| 2516 DEFINE_TEST_NEON_ACROSS(addv, Basic) |
| 2517 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic) |
| 2518 DEFINE_TEST_NEON_ACROSS(umaxv, Basic) |
| 2519 DEFINE_TEST_NEON_ACROSS(uminv, Basic) |
| 2520 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic) |
| 2521 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic) |
| 2522 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic) |
| 2523 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic) |
| 2524 |
| 2525 // Advanced SIMD permute. |
| 2526 DEFINE_TEST_NEON_3SAME(uzp1, Basic) |
| 2527 DEFINE_TEST_NEON_3SAME(trn1, Basic) |
| 2528 DEFINE_TEST_NEON_3SAME(zip1, Basic) |
| 2529 DEFINE_TEST_NEON_3SAME(uzp2, Basic) |
| 2530 DEFINE_TEST_NEON_3SAME(trn2, Basic) |
| 2531 DEFINE_TEST_NEON_3SAME(zip2, Basic) |
| 2532 |
| 2533 // Advanced SIMD vector x indexed element. |
| 2534 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic) |
| 2535 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic) |
| 2536 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic) |
| 2537 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic) |
| 2538 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic) |
| 2539 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic) |
| 2540 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic) |
| 2541 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic) |
| 2542 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic) |
| 2543 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic) |
| 2544 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic) |
| 2545 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic) |
| 2546 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic) |
| 2547 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic) |
| 2548 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic) |
| 2549 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic) |
| 2550 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic) |
| 2551 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic) |
| 2552 |
| 2553 // Advanced SIMD scalar x indexed element. |
| 2554 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic) |
| 2555 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic) |
| 2556 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic) |
| 2557 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic) |
| 2558 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic) |
| 2559 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic) |
| 2560 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic) |
| 2561 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic) |
| 2562 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic) |
OLD | NEW |