Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include <stdio.h> | |
| 6 #include <stdlib.h> | |
| 7 #include <string.h> | |
| 8 #include <cmath> | |
| 9 #include <limits> | |
| 10 | |
| 11 #include "src/arm64/decoder-arm64-inl.h" | |
| 12 #include "src/arm64/disasm-arm64.h" | |
| 13 #include "src/arm64/simulator-arm64.h" | |
| 14 #include "src/arm64/utils-arm64.h" | |
| 15 #include "src/base/platform/platform.h" | |
| 16 #include "src/base/utils/random-number-generator.h" | |
| 17 #include "src/macro-assembler.h" | |
| 18 #include "test/cctest/cctest.h" | |
| 19 #include "test/cctest/test-simulator-inputs-arm64.h" | |
| 20 #include "test/cctest/test-simulator-traces-arm64.h" | |
| 21 #include "test/cctest/test-utils-arm64.h" | |
| 22 | |
| 23 using namespace v8::internal; | |
| 24 | |
| 25 // Test infrastructure. | |
| 26 // | |
| 27 // Tests are functions which accept no parameters and have no return values. | |
| 28 // The testing code should not perform an explicit return once completed. For | |
| 29 // example to test the mov immediate instruction a very simple test would be: | |
| 30 // | |
| 31 // SIMTEST(mov_x0_one) { | |
| 32 // SETUP(); | |
| 33 // | |
| 34 // START(); | |
| 35 // __ mov(x0, Operand(1)); | |
| 36 // END(); | |
| 37 // | |
| 38 // RUN(); | |
| 39 // | |
| 40 // CHECK_EQUAL_64(1, x0); | |
| 41 // | |
| 42 // TEARDOWN(); | |
| 43 // } | |
| 44 // | |
| 45 // Within a START ... END block all registers but sp can be modified. sp has to | |
| 46 // be explicitly saved/restored. The END() macro replaces the function return | |
| 47 // so it may appear multiple times in a test if the test has multiple exit | |
| 48 // points. | |
| 49 // | |
| 50 // Once the test has been run all integer and floating point registers as well | |
| 51 // as flags are accessible through a RegisterDump instance, see | |
| 52 // utils-arm64.cc for more info on RegisterDump. | |
|
bbudge
2017/01/31 01:41:32
It seems to be in test-utils-arm64.h
martyn.capewell
2017/02/03 11:01:31
Done.
| |
| 53 // | |
| 54 // We provide some helper assert to handle common cases: | |
| 55 // | |
| 56 // CHECK_EQUAL_32(int32_t, int_32t) | |
|
bbudge
2017/01/31 01:41:32
nit int32_t
martyn.capewell
2017/02/03 11:01:31
Done.
| |
| 57 // CHECK_EQUAL_FP32(float, float) | |
| 58 // CHECK_EQUAL_32(int32_t, W register) | |
| 59 // CHECK_EQUAL_FP32(float, S register) | |
| 60 // CHECK_EQUAL_64(int64_t, int_64t) | |
|
bbudge
2017/01/31 01:41:32
int64_t
martyn.capewell
2017/02/03 11:01:31
Done.
| |
| 61 // CHECK_EQUAL_FP64(double, double) | |
| 62 // CHECK_EQUAL_64(int64_t, X register) | |
| 63 // CHECK_EQUAL_64(X register, X register) | |
| 64 // CHECK_EQUAL_FP64(double, D register) | |
| 65 // | |
| 66 // e.g. CHECK_EQUAL_64(0.5, d30); | |
| 67 // | |
| 68 // If more advance computation is required before the assert then access the | |
| 69 // RegisterDump named core directly: | |
| 70 // | |
| 71 // CHECK_EQUAL_64(0x1234, core.xreg(0) & 0xffff); | |
| 72 | |
| 73 #if 0 // TODO(all): enable. | |
| 74 static v8::Persistent<v8::Context> env; | |
| 75 | |
| 76 static void InitializeVM() { | |
| 77 if (env.IsEmpty()) { | |
| 78 env = v8::Context::New(); | |
| 79 } | |
| 80 } | |
| 81 #endif | |
| 82 | |
| 83 #define __ masm. | |
| 84 #define SIMTEST(name) TEST(SIM_##name) | |
| 85 | |
| 86 #define BUF_SIZE 8192 | |
| 87 #define SETUP() SETUP_SIZE(BUF_SIZE) | |
| 88 | |
| 89 #define INIT_V8() CcTest::InitializeVM(); | |
| 90 | |
| 91 #ifdef USE_SIMULATOR | |
| 92 | |
| 93 // Run tests with the simulator. | |
| 94 #define SETUP_SIZE(buf_size) \ | |
| 95 Isolate* isolate = CcTest::i_isolate(); \ | |
| 96 HandleScope scope(isolate); \ | |
| 97 CHECK(isolate != NULL); \ | |
| 98 byte* buf = new byte[buf_size]; \ | |
| 99 MacroAssembler masm(isolate, buf, buf_size, \ | |
| 100 v8::internal::CodeObjectRequired::kYes); \ | |
| 101 Decoder<DispatchingDecoderVisitor>* decoder = \ | |
| 102 new Decoder<DispatchingDecoderVisitor>(); \ | |
| 103 Simulator simulator(decoder); \ | |
| 104 RegisterDump core; | |
| 105 | |
| 106 // Reset the assembler and simulator, so that instructions can be generated, | |
| 107 // but don't actually emit any code. This can be used by tests that need to | |
| 108 // emit instructions at the start of the buffer. Note that START_AFTER_RESET | |
| 109 // must be called before any callee-saved register is modified, and before an | |
| 110 // END is encountered. | |
| 111 // | |
| 112 // Most tests should call START, rather than call RESET directly. | |
| 113 #define RESET() \ | |
| 114 __ Reset(); \ | |
| 115 simulator.ResetState(); | |
| 116 | |
| 117 #define START_AFTER_RESET() \ | |
| 118 __ SetStackPointer(csp); \ | |
| 119 __ PushCalleeSavedRegisters(); \ | |
| 120 __ Debug("Start test.", __LINE__, TRACE_ENABLE | LOG_ALL); | |
| 121 | |
| 122 #define START() \ | |
| 123 RESET(); \ | |
| 124 START_AFTER_RESET(); | |
| 125 | |
| 126 #define RUN() simulator.RunFrom(reinterpret_cast<Instruction*>(buf)) | |
| 127 | |
| 128 #define END() \ | |
| 129 __ Debug("End test.", __LINE__, TRACE_DISABLE | LOG_ALL); \ | |
| 130 core.Dump(&masm); \ | |
| 131 __ PopCalleeSavedRegisters(); \ | |
| 132 __ Ret(); \ | |
| 133 __ GetCode(NULL); | |
| 134 | |
| 135 #define TEARDOWN() delete[] buf; | |
| 136 | |
| 137 #else // ifdef USE_SIMULATOR. | |
| 138 // Run the test on real hardware or models. | |
| 139 #define SETUP_SIZE(buf_size) \ | |
| 140 Isolate* isolate = CcTest::i_isolate(); \ | |
| 141 HandleScope scope(isolate); \ | |
| 142 CHECK(isolate != NULL); \ | |
| 143 size_t actual_size; \ | |
| 144 byte* buf = static_cast<byte*>( \ | |
| 145 v8::base::OS::Allocate(buf_size, &actual_size, true)); \ | |
| 146 MacroAssembler masm(isolate, buf, actual_size, \ | |
| 147 v8::internal::CodeObjectRequired::kYes); \ | |
| 148 RegisterDump core; | |
| 149 | |
| 150 #define RESET() \ | |
| 151 __ Reset(); \ | |
| 152 /* Reset the machine state (like simulator.ResetState()). */ \ | |
| 153 __ Msr(NZCV, xzr); \ | |
| 154 __ Msr(FPCR, xzr); | |
| 155 | |
| 156 #define START_AFTER_RESET() \ | |
| 157 __ SetStackPointer(csp); \ | |
| 158 __ PushCalleeSavedRegisters(); | |
| 159 | |
| 160 #define START() \ | |
| 161 RESET(); \ | |
| 162 START_AFTER_RESET(); | |
| 163 | |
| 164 #define RUN() \ | |
| 165 Assembler::FlushICache(isolate, buf, masm.SizeOfGeneratedCode()); \ | |
| 166 { \ | |
| 167 void (*test_function)(void); \ | |
| 168 memcpy(&test_function, &buf, sizeof(buf)); \ | |
| 169 test_function(); \ | |
| 170 } | |
| 171 | |
| 172 #define END() \ | |
| 173 core.Dump(&masm); \ | |
| 174 __ PopCalleeSavedRegisters(); \ | |
| 175 __ Ret(); \ | |
| 176 __ GetCode(NULL); | |
| 177 | |
| 178 #define TEARDOWN() v8::base::OS::Free(buf, actual_size); | |
| 179 | |
| 180 #endif // ifdef USE_SIMULATOR. | |
| 181 | |
| 182 #define CHECK_EQUAL_NZCV(expected) CHECK(EqualNzcv(expected, core.flags_nzcv())) | |
| 183 | |
| 184 #define CHECK_EQUAL_REGISTERS(expected) CHECK(EqualRegisters(&expected, &core)) | |
| 185 | |
| 186 #define CHECK_EQUAL_32(expected, result) \ | |
| 187 CHECK(Equal32(static_cast<uint32_t>(expected), &core, result)) | |
| 188 | |
| 189 #define CHECK_EQUAL_FP32(expected, result) \ | |
| 190 CHECK(EqualFP32(expected, &core, result)) | |
| 191 | |
| 192 #define CHECK_EQUAL_64(expected, result) CHECK(Equal64(expected, &core, result)) | |
| 193 | |
| 194 #define CHECK_EQUAL_FP64(expected, result) \ | |
| 195 CHECK(EqualFP64(expected, &core, result)) | |
| 196 | |
| 197 #ifdef DEBUG | |
| 198 #define CHECK_LITERAL_POOL_SIZE(expected) \ | |
| 199 CHECK((expected) == (__ LiteralPoolSize())) | |
| 200 #else | |
| 201 #define CHECK_LITERAL_POOL_SIZE(expected) ((void)0) | |
| 202 #endif | |
| 203 | |
| 204 // The maximum number of errors to report in detail for each test. | |
| 205 static const unsigned kErrorReportLimit = 8; | |
| 206 | |
| 207 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd, | |
| 208 const VRegister& vn); | |
| 209 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd, | |
| 210 const VRegister& vn, | |
| 211 const VRegister& vm); | |
| 212 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd, | |
| 213 const VRegister& vn, | |
| 214 const VRegister& vm, | |
| 215 int vm_index); | |
| 216 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)( | |
| 217 const VRegister& vd, int imm1, const VRegister& vn, int imm2); | |
| 218 | |
| 219 // This helps using the same typename for both the function pointer | |
| 220 // and the array of immediates passed to helper routines. | |
| 221 template <typename T> | |
| 222 class Test2OpImmediateNEONHelper_t { | |
| 223 public: | |
| 224 typedef void (MacroAssembler::*mnemonic)(const VRegister& vd, | |
| 225 const VRegister& vn, T imm); | |
| 226 }; | |
| 227 | |
| 228 namespace { | |
| 229 | |
| 230 // Maximum number of hex characters required to represent values of either | |
| 231 // templated type. | |
| 232 template <typename Ta, typename Tb> | |
| 233 unsigned MaxHexCharCount() { | |
| 234 unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb))); | |
| 235 return (count * 8) / 4; | |
| 236 } | |
| 237 | |
| 238 // ==== Tests for instructions of the form <INST> VReg, VReg. ==== | |
| 239 | |
| 240 void Test1OpNEON_Helper(Test1OpNEONHelper_t helper, uintptr_t inputs_n, | |
| 241 unsigned inputs_n_length, uintptr_t results, | |
| 242 VectorFormat vd_form, VectorFormat vn_form) { | |
| 243 DCHECK_NE(vd_form, kFormatUndefined); | |
| 244 DCHECK_NE(vn_form, kFormatUndefined); | |
| 245 | |
| 246 SETUP(); | |
| 247 START(); | |
| 248 | |
| 249 // Roll up the loop to keep the code size down. | |
| 250 Label loop_n; | |
| 251 | |
| 252 Register out = x0; | |
| 253 Register inputs_n_base = x1; | |
| 254 Register inputs_n_last_16bytes = x3; | |
| 255 Register index_n = x5; | |
| 256 | |
| 257 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); | |
| 258 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
| 259 | |
| 260 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); | |
| 261 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
| 262 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
| 263 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); | |
| 264 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); | |
| 265 | |
| 266 // These will be either a D- or a Q-register form, with a single lane | |
| 267 // (for use in scalar load and store operations). | |
| 268 VRegister vd = VRegister::Create(0, vd_bits); | |
| 269 VRegister vn = v1.V16B(); | |
| 270 VRegister vntmp = v3.V16B(); | |
| 271 | |
| 272 // These will have the correct format for use when calling 'helper'. | |
| 273 VRegister vd_helper = VRegister::Create(0, vd_bits, vd_lane_count); | |
| 274 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); | |
| 275 | |
| 276 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. | |
| 277 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); | |
| 278 | |
| 279 __ Mov(out, results); | |
| 280 | |
| 281 __ Mov(inputs_n_base, inputs_n); | |
| 282 __ Mov(inputs_n_last_16bytes, | |
| 283 inputs_n + (vn_lane_bytes * inputs_n_length) - 16); | |
| 284 | |
| 285 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); | |
| 286 | |
| 287 __ Mov(index_n, 0); | |
| 288 __ Bind(&loop_n); | |
| 289 | |
| 290 __ Ldr(vntmp_single, | |
| 291 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); | |
| 292 __ Ext(vn, vn, vntmp, vn_lane_bytes); | |
| 293 | |
| 294 // Set the destination to zero. | |
| 295 | |
| 296 // TODO(all): Setting the destination to values other than zero might be a | |
| 297 // better test for instructions such as sqxtn2 which may leave parts of V | |
| 298 // registers unchanged. | |
| 299 __ Movi(vd.V16B(), 0); | |
| 300 | |
| 301 (masm.*helper)(vd_helper, vn_helper); | |
| 302 | |
| 303 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex)); | |
| 304 | |
| 305 __ Add(index_n, index_n, 1); | |
| 306 __ Cmp(index_n, inputs_n_length); | |
| 307 __ B(lo, &loop_n); | |
| 308 | |
| 309 END(); | |
| 310 RUN(); | |
| 311 TEARDOWN(); | |
| 312 } | |
| 313 | |
| 314 // Test NEON instructions. The inputs_*[] and expected[] arrays should be | |
| 315 // arrays of rawbit representation of input values. This ensures that | |
| 316 // exact bit comparisons can be performed. | |
| 317 template <typename Td, typename Tn> | |
| 318 void Test1OpNEON(const char* name, Test1OpNEONHelper_t helper, | |
| 319 const Tn inputs_n[], unsigned inputs_n_length, | |
| 320 const Td expected[], unsigned expected_length, | |
| 321 VectorFormat vd_form, VectorFormat vn_form) { | |
| 322 DCHECK_GT(inputs_n_length, 0U); | |
| 323 | |
| 324 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
| 325 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
| 326 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
| 327 | |
| 328 const unsigned results_length = inputs_n_length; | |
| 329 Td* results = new Td[results_length * vd_lane_count]; | |
|
bbudge
2017/01/31 01:41:31
Use std::vector rather than manage raw array point
martyn.capewell
2017/02/03 11:01:31
I could do this, but I'd need to push dummy values
bbudge
2017/02/08 01:39:11
If the types Td are default constructible, you can
martyn.capewell
2017/02/15 11:51:00
Done.
| |
| 330 const unsigned lane_bit = sizeof(Td) * 8; | |
| 331 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); | |
| 332 | |
| 333 Test1OpNEON_Helper(helper, reinterpret_cast<uintptr_t>(inputs_n), | |
| 334 inputs_n_length, reinterpret_cast<uintptr_t>(results), | |
| 335 vd_form, vn_form); | |
| 336 | |
| 337 if (CcTest::sim_test_trace()) { | |
| 338 // Print the results. | |
| 339 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); | |
| 340 for (unsigned iteration = 0; iteration < results_length; iteration++) { | |
| 341 printf(" "); | |
| 342 // Output a separate result for each element of the result vector. | |
| 343 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
| 344 unsigned index = lane + (iteration * vd_lane_count); | |
| 345 printf(" 0x%0*" PRIx64 ",", lane_len_in_hex, | |
| 346 static_cast<uint64_t>(results[index])); | |
| 347 } | |
| 348 printf("\n"); | |
| 349 } | |
| 350 | |
| 351 printf("};\n"); | |
| 352 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", name, | |
| 353 results_length); | |
| 354 } else { | |
| 355 // Check the results. | |
| 356 CHECK(expected_length == results_length); | |
| 357 unsigned error_count = 0; | |
| 358 unsigned d = 0; | |
| 359 const char* padding = " "; | |
| 360 DCHECK_GE(strlen(padding), (lane_len_in_hex + 1)); | |
| 361 for (unsigned n = 0; n < inputs_n_length; n++, d++) { | |
| 362 bool error_in_vector = false; | |
| 363 | |
| 364 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
| 365 unsigned output_index = (n * vd_lane_count) + lane; | |
| 366 | |
| 367 if (results[output_index] != expected[output_index]) { | |
| 368 error_in_vector = true; | |
| 369 break; | |
| 370 } | |
| 371 } | |
| 372 | |
| 373 if (error_in_vector && (++error_count <= kErrorReportLimit)) { | |
| 374 printf("%s\n", name); | |
| 375 printf(" Vn%.*s| Vd%.*s| Expected\n", lane_len_in_hex + 1, padding, | |
| 376 lane_len_in_hex + 1, padding); | |
| 377 | |
| 378 const unsigned first_index_n = | |
| 379 inputs_n_length - (16 / vn_lane_bytes) + n + 1; | |
| 380 | |
| 381 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count); | |
| 382 lane++) { | |
| 383 unsigned output_index = (n * vd_lane_count) + lane; | |
| 384 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; | |
| 385 | |
| 386 printf( | |
| 387 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 | |
| 388 " " | |
| 389 "| 0x%0*" PRIx64 "\n", | |
| 390 results[output_index] != expected[output_index] ? '*' : ' ', | |
| 391 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]), | |
| 392 lane_len_in_hex, static_cast<uint64_t>(results[output_index]), | |
| 393 lane_len_in_hex, static_cast<uint64_t>(expected[output_index])); | |
| 394 } | |
| 395 } | |
| 396 } | |
| 397 DCHECK_EQ(d, expected_length); | |
| 398 if (error_count > kErrorReportLimit) { | |
| 399 printf("%u other errors follow.\n", error_count - kErrorReportLimit); | |
| 400 } | |
| 401 DCHECK_EQ(error_count, 0U); | |
| 402 } | |
| 403 delete[] results; | |
| 404 } | |
| 405 | |
| 406 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ==== | |
| 407 // where <V> is one of B, H, S or D registers. | |
| 408 // e.g. saddlv H1, v0.8B | |
| 409 | |
| 410 // TODO(all): Change tests to store all lanes of the resulting V register. | |
| 411 // Some tests store all 128 bits of the resulting V register to | |
| 412 // check the simulator's behaviour on the rest of the register. | |
| 413 // This is better than storing the affected lanes only. | |
| 414 // Change any tests such as the 'Across' template to do the same. | |
| 415 | |
| 416 void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper, uintptr_t inputs_n, | |
| 417 unsigned inputs_n_length, uintptr_t results, | |
| 418 VectorFormat vd_form, VectorFormat vn_form) { | |
| 419 DCHECK_NE(vd_form, kFormatUndefined); | |
| 420 DCHECK_NE(vn_form, kFormatUndefined); | |
| 421 | |
| 422 SETUP(); | |
| 423 START(); | |
| 424 | |
| 425 // Roll up the loop to keep the code size down. | |
| 426 Label loop_n; | |
| 427 | |
| 428 Register out = x0; | |
| 429 Register inputs_n_base = x1; | |
| 430 Register inputs_n_last_vector = x3; | |
| 431 Register index_n = x5; | |
| 432 | |
| 433 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); | |
| 434 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); | |
| 435 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
| 436 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
| 437 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); | |
| 438 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); | |
| 439 | |
| 440 // Test destructive operations by (arbitrarily) using the same register for | |
| 441 // B and S lane sizes. | |
| 442 bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize); | |
| 443 | |
| 444 // These will be either a D- or a Q-register form, with a single lane | |
| 445 // (for use in scalar load and store operations). | |
| 446 // Create two aliases for v8; the first is the destination for the tested | |
| 447 // instruction, the second, the whole Q register to check the results. | |
| 448 VRegister vd = VRegister::Create(0, vd_bits); | |
| 449 VRegister vdstr = VRegister::Create(0, kQRegSizeInBits); | |
| 450 | |
| 451 VRegister vn = VRegister::Create(1, vn_bits); | |
| 452 VRegister vntmp = VRegister::Create(3, vn_bits); | |
| 453 | |
| 454 // These will have the correct format for use when calling 'helper'. | |
| 455 VRegister vd_helper = VRegister::Create(0, vn_bits, vn_lane_count); | |
| 456 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); | |
| 457 | |
| 458 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. | |
| 459 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); | |
| 460 | |
| 461 // Same registers for use in the 'ext' instructions. | |
| 462 VRegister vn_ext = (kDRegSizeInBits == vn_bits) ? vn.V8B() : vn.V16B(); | |
| 463 VRegister vntmp_ext = | |
| 464 (kDRegSizeInBits == vn_bits) ? vntmp.V8B() : vntmp.V16B(); | |
| 465 | |
| 466 __ Mov(out, results); | |
| 467 | |
| 468 __ Mov(inputs_n_base, inputs_n); | |
| 469 __ Mov(inputs_n_last_vector, | |
| 470 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count)); | |
| 471 | |
| 472 __ Ldr(vn, MemOperand(inputs_n_last_vector)); | |
| 473 | |
| 474 __ Mov(index_n, 0); | |
| 475 __ Bind(&loop_n); | |
| 476 | |
| 477 __ Ldr(vntmp_single, | |
| 478 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); | |
| 479 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes); | |
| 480 | |
| 481 if (destructive) { | |
| 482 __ Mov(vd_helper, vn_helper); | |
| 483 (masm.*helper)(vd, vd_helper); | |
| 484 } else { | |
| 485 (masm.*helper)(vd, vn_helper); | |
| 486 } | |
| 487 | |
| 488 __ Str(vdstr, MemOperand(out, kQRegSize, PostIndex)); | |
| 489 | |
| 490 __ Add(index_n, index_n, 1); | |
| 491 __ Cmp(index_n, inputs_n_length); | |
| 492 __ B(lo, &loop_n); | |
| 493 | |
| 494 END(); | |
| 495 RUN(); | |
| 496 TEARDOWN(); | |
| 497 } | |
| 498 | |
| 499 // Test NEON instructions. The inputs_*[] and expected[] arrays should be | |
| 500 // arrays of rawbit representation of input values. This ensures that | |
| 501 // exact bit comparisons can be performed. | |
| 502 template <typename Td, typename Tn> | |
| 503 void Test1OpAcrossNEON(const char* name, Test1OpNEONHelper_t helper, | |
| 504 const Tn inputs_n[], unsigned inputs_n_length, | |
| 505 const Td expected[], unsigned expected_length, | |
| 506 VectorFormat vd_form, VectorFormat vn_form) { | |
| 507 DCHECK_GT(inputs_n_length, 0U); | |
| 508 | |
| 509 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
| 510 const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form); | |
| 511 | |
| 512 const unsigned results_length = inputs_n_length; | |
| 513 Td* results = new Td[results_length * vd_lanes_per_q]; | |
| 514 const unsigned lane_bit = sizeof(Td) * 8; | |
| 515 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); | |
| 516 | |
| 517 Test1OpAcrossNEON_Helper( | |
| 518 helper, reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, | |
| 519 reinterpret_cast<uintptr_t>(results), vd_form, vn_form); | |
| 520 | |
| 521 if (CcTest::sim_test_trace()) { | |
| 522 // Print the results. | |
| 523 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); | |
| 524 for (unsigned iteration = 0; iteration < results_length; iteration++) { | |
| 525 printf(" "); | |
| 526 // Output a separate result for each element of the result vector. | |
| 527 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
| 528 unsigned index = lane + (iteration * vd_lane_count); | |
| 529 printf(" 0x%0*" PRIx64 ",", lane_len_in_hex, | |
| 530 static_cast<uint64_t>(results[index])); | |
| 531 } | |
| 532 printf("\n"); | |
| 533 } | |
| 534 | |
| 535 printf("};\n"); | |
| 536 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", name, | |
| 537 results_length); | |
| 538 } else { | |
| 539 // Check the results. | |
| 540 DCHECK_EQ(expected_length, results_length); | |
| 541 unsigned error_count = 0; | |
| 542 unsigned d = 0; | |
| 543 const char* padding = " "; | |
| 544 DCHECK_GE(strlen(padding), (lane_len_in_hex + 1)); | |
| 545 for (unsigned n = 0; n < inputs_n_length; n++, d++) { | |
| 546 bool error_in_vector = false; | |
| 547 | |
| 548 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
| 549 unsigned expected_index = (n * vd_lane_count) + lane; | |
| 550 unsigned results_index = (n * vd_lanes_per_q) + lane; | |
| 551 | |
| 552 if (results[results_index] != expected[expected_index]) { | |
| 553 error_in_vector = true; | |
| 554 break; | |
| 555 } | |
| 556 | |
| 557 // For across operations, the remaining lanes should be zero. | |
| 558 for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) { | |
| 559 unsigned results_index = (n * vd_lanes_per_q) + lane; | |
| 560 if (results[results_index] != 0) { | |
| 561 error_in_vector = true; | |
| 562 break; | |
| 563 } | |
| 564 } | |
| 565 } | |
| 566 | |
| 567 if (error_in_vector && (++error_count <= kErrorReportLimit)) { | |
| 568 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
| 569 | |
| 570 printf("%s\n", name); | |
| 571 printf(" Vn%.*s| Vd%.*s| Expected\n", lane_len_in_hex + 1, padding, | |
| 572 lane_len_in_hex + 1, padding); | |
| 573 | |
| 574 // TODO(all): In case of an error, all tests print out as many elements | |
| 575 // as there are lanes in the output or input vectors. This way the | |
| 576 // viewer can read all the values that were needed for the operation | |
| 577 // but the output contains also unnecessary values. These prints can be | |
| 578 // improved according to the arguments passed to test functions. | |
| 579 // This output for the 'Across' category has the required modifications. | |
|
bbudge
2017/01/31 01:41:31
It's not clear what is "to be done" here.
martyn.capewell
2017/02/03 11:01:31
I think it's saying that, on error, it prints out
bbudge
2017/02/08 01:39:11
OK
| |
| 580 for (unsigned lane = 0; lane < vn_lane_count; lane++) { | |
| 581 unsigned results_index = | |
| 582 (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane); | |
| 583 unsigned input_index_n = | |
| 584 (inputs_n_length - vn_lane_count + n + 1 + lane) % | |
| 585 inputs_n_length; | |
| 586 | |
| 587 Td expect = 0; | |
| 588 if ((vn_lane_count - 1) == lane) { | |
| 589 // This is the last lane to be printed, ie. the least-significant | |
| 590 // lane, so use the expected value; any other lane should be zero. | |
| 591 unsigned expected_index = n * vd_lane_count; | |
| 592 expect = expected[expected_index]; | |
| 593 } | |
| 594 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", | |
| 595 results[results_index] != expect ? '*' : ' ', lane_len_in_hex, | |
| 596 static_cast<uint64_t>(inputs_n[input_index_n]), | |
| 597 lane_len_in_hex, static_cast<uint64_t>(results[results_index]), | |
| 598 lane_len_in_hex, static_cast<uint64_t>(expect)); | |
| 599 } | |
| 600 } | |
| 601 } | |
| 602 DCHECK_EQ(d, expected_length); | |
| 603 if (error_count > kErrorReportLimit) { | |
| 604 printf("%u other errors follow.\n", error_count - kErrorReportLimit); | |
| 605 } | |
| 606 DCHECK_EQ(error_count, 0U); | |
| 607 } | |
| 608 delete[] results; | |
| 609 } | |
| 610 | |
| 611 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ==== | |
| 612 | |
| 613 void Test2OpNEON_Helper(Test2OpNEONHelper_t helper, uintptr_t inputs_d, | |
| 614 uintptr_t inputs_n, unsigned inputs_n_length, | |
| 615 uintptr_t inputs_m, unsigned inputs_m_length, | |
| 616 uintptr_t results, VectorFormat vd_form, | |
| 617 VectorFormat vn_form, VectorFormat vm_form) { | |
| 618 DCHECK_NE(vd_form, kFormatUndefined); | |
| 619 DCHECK_NE(vn_form, kFormatUndefined); | |
| 620 DCHECK_NE(vm_form, kFormatUndefined); | |
| 621 | |
| 622 SETUP(); | |
| 623 START(); | |
| 624 | |
| 625 // Roll up the loop to keep the code size down. | |
| 626 Label loop_n, loop_m; | |
| 627 | |
| 628 Register out = x0; | |
| 629 Register inputs_n_base = x1; | |
| 630 Register inputs_m_base = x2; | |
| 631 Register inputs_d_base = x3; | |
| 632 Register inputs_n_last_16bytes = x4; | |
| 633 Register inputs_m_last_16bytes = x5; | |
| 634 Register index_n = x6; | |
| 635 Register index_m = x7; | |
| 636 | |
| 637 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); | |
| 638 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
| 639 | |
| 640 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); | |
| 641 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
| 642 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
| 643 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); | |
| 644 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); | |
| 645 | |
| 646 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form); | |
| 647 const unsigned vm_lane_count = LaneCountFromFormat(vm_form); | |
| 648 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form); | |
| 649 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form); | |
| 650 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form); | |
| 651 | |
| 652 // Always load and store 128 bits regardless of the format. | |
| 653 VRegister vd = v0.V16B(); | |
| 654 VRegister vn = v1.V16B(); | |
| 655 VRegister vm = v2.V16B(); | |
| 656 VRegister vntmp = v3.V16B(); | |
| 657 VRegister vmtmp = v4.V16B(); | |
| 658 VRegister vres = v5.V16B(); | |
| 659 | |
| 660 // These will have the correct format for calling the 'helper'. | |
| 661 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); | |
| 662 VRegister vm_helper = VRegister::Create(2, vm_bits, vm_lane_count); | |
| 663 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count); | |
| 664 | |
| 665 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. | |
| 666 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); | |
| 667 VRegister vmtmp_single = VRegister::Create(4, vm_lane_bits); | |
| 668 | |
| 669 __ Mov(out, results); | |
| 670 | |
| 671 __ Mov(inputs_d_base, inputs_d); | |
| 672 | |
| 673 __ Mov(inputs_n_base, inputs_n); | |
| 674 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16)); | |
| 675 __ Mov(inputs_m_base, inputs_m); | |
| 676 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16)); | |
| 677 | |
| 678 __ Ldr(vd, MemOperand(inputs_d_base)); | |
| 679 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); | |
| 680 __ Ldr(vm, MemOperand(inputs_m_last_16bytes)); | |
| 681 | |
| 682 __ Mov(index_n, 0); | |
| 683 __ Bind(&loop_n); | |
| 684 | |
| 685 __ Ldr(vntmp_single, | |
| 686 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); | |
| 687 __ Ext(vn, vn, vntmp, vn_lane_bytes); | |
| 688 | |
| 689 __ Mov(index_m, 0); | |
| 690 __ Bind(&loop_m); | |
| 691 | |
| 692 __ Ldr(vmtmp_single, | |
| 693 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2)); | |
| 694 __ Ext(vm, vm, vmtmp, vm_lane_bytes); | |
| 695 | |
| 696 __ Mov(vres, vd); | |
| 697 | |
| 698 (masm.*helper)(vres_helper, vn_helper, vm_helper); | |
| 699 | |
| 700 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex)); | |
| 701 | |
| 702 __ Add(index_m, index_m, 1); | |
| 703 __ Cmp(index_m, inputs_m_length); | |
| 704 __ B(lo, &loop_m); | |
| 705 | |
| 706 __ Add(index_n, index_n, 1); | |
| 707 __ Cmp(index_n, inputs_n_length); | |
| 708 __ B(lo, &loop_n); | |
| 709 | |
| 710 END(); | |
| 711 RUN(); | |
| 712 TEARDOWN(); | |
| 713 } | |
| 714 | |
| 715 // Test NEON instructions. The inputs_*[] and expected[] arrays should be | |
| 716 // arrays of rawbit representation of input values. This ensures that | |
| 717 // exact bit comparisons can be performed. | |
| 718 template <typename Td, typename Tn, typename Tm> | |
| 719 void Test2OpNEON(const char* name, Test2OpNEONHelper_t helper, | |
| 720 const Td inputs_d[], const Tn inputs_n[], | |
| 721 unsigned inputs_n_length, const Tm inputs_m[], | |
| 722 unsigned inputs_m_length, const Td expected[], | |
| 723 unsigned expected_length, VectorFormat vd_form, | |
| 724 VectorFormat vn_form, VectorFormat vm_form) { | |
| 725 DCHECK(inputs_n_length > 0 && inputs_m_length > 0); | |
| 726 | |
| 727 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form); | |
| 728 | |
| 729 const unsigned results_length = inputs_n_length * inputs_m_length; | |
| 730 Td* results = new Td[results_length * vd_lane_count]; | |
| 731 const unsigned lane_bit = sizeof(Td) * 8; | |
| 732 const unsigned lane_len_in_hex = | |
| 733 static_cast<unsigned>(std::max(sizeof(Td), sizeof(Tm)) * 8) / 4; | |
| 734 | |
| 735 Test2OpNEON_Helper(helper, reinterpret_cast<uintptr_t>(inputs_d), | |
| 736 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, | |
| 737 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length, | |
| 738 reinterpret_cast<uintptr_t>(results), vd_form, vn_form, | |
| 739 vm_form); | |
| 740 | |
| 741 if (CcTest::sim_test_trace()) { | |
| 742 // Print the results. | |
| 743 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); | |
| 744 for (unsigned iteration = 0; iteration < results_length; iteration++) { | |
| 745 printf(" "); | |
| 746 // Output a separate result for each element of the result vector. | |
| 747 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
| 748 unsigned index = lane + (iteration * vd_lane_count); | |
| 749 printf(" 0x%0*" PRIx64 ",", lane_len_in_hex, | |
| 750 static_cast<uint64_t>(results[index])); | |
| 751 } | |
| 752 printf("\n"); | |
| 753 } | |
| 754 | |
| 755 printf("};\n"); | |
| 756 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", name, | |
| 757 results_length); | |
| 758 } else { | |
| 759 // Check the results. | |
| 760 CHECK(expected_length == results_length); | |
| 761 unsigned error_count = 0; | |
| 762 unsigned d = 0; | |
| 763 const char* padding = " "; | |
| 764 DCHECK_GE(strlen(padding), lane_len_in_hex + 1); | |
| 765 for (unsigned n = 0; n < inputs_n_length; n++) { | |
| 766 for (unsigned m = 0; m < inputs_m_length; m++, d++) { | |
| 767 bool error_in_vector = false; | |
| 768 | |
| 769 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
| 770 unsigned output_index = (n * inputs_m_length * vd_lane_count) + | |
| 771 (m * vd_lane_count) + lane; | |
| 772 | |
| 773 if (results[output_index] != expected[output_index]) { | |
| 774 error_in_vector = true; | |
| 775 break; | |
| 776 } | |
| 777 } | |
| 778 | |
| 779 if (error_in_vector && (++error_count <= kErrorReportLimit)) { | |
| 780 printf("%s\n", name); | |
| 781 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n", | |
| 782 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding, | |
| 783 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding); | |
| 784 | |
| 785 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
| 786 unsigned output_index = (n * inputs_m_length * vd_lane_count) + | |
| 787 (m * vd_lane_count) + lane; | |
| 788 unsigned input_index_n = | |
| 789 (inputs_n_length - vd_lane_count + n + 1 + lane) % | |
| 790 inputs_n_length; | |
| 791 unsigned input_index_m = | |
| 792 (inputs_m_length - vd_lane_count + m + 1 + lane) % | |
| 793 inputs_m_length; | |
| 794 | |
| 795 printf( | |
| 796 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 | |
| 797 " " | |
| 798 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", | |
| 799 results[output_index] != expected[output_index] ? '*' : ' ', | |
| 800 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]), | |
| 801 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]), | |
| 802 lane_len_in_hex, static_cast<uint64_t>(inputs_m[input_index_m]), | |
| 803 lane_len_in_hex, static_cast<uint64_t>(results[output_index]), | |
| 804 lane_len_in_hex, static_cast<uint64_t>(expected[output_index])); | |
| 805 } | |
| 806 } | |
| 807 } | |
| 808 } | |
| 809 DCHECK_EQ(d, expected_length); | |
| 810 if (error_count > kErrorReportLimit) { | |
| 811 printf("%u other errors follow.\n", error_count - kErrorReportLimit); | |
| 812 } | |
| 813 DCHECK_EQ(error_count, 0U); | |
| 814 } | |
| 815 delete[] results; | |
| 816 } | |
| 817 | |
| 818 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ==== | |
| 819 | |
| 820 void TestByElementNEON_Helper(TestByElementNEONHelper_t helper, | |
| 821 uintptr_t inputs_d, uintptr_t inputs_n, | |
| 822 unsigned inputs_n_length, uintptr_t inputs_m, | |
| 823 unsigned inputs_m_length, const int indices[], | |
| 824 unsigned indices_length, uintptr_t results, | |
| 825 VectorFormat vd_form, VectorFormat vn_form, | |
| 826 VectorFormat vm_form) { | |
| 827 DCHECK_NE(vd_form, kFormatUndefined); | |
| 828 DCHECK_NE(vn_form, kFormatUndefined); | |
| 829 DCHECK_NE(vm_form, kFormatUndefined); | |
| 830 | |
| 831 SETUP(); | |
| 832 START(); | |
| 833 | |
| 834 // Roll up the loop to keep the code size down. | |
| 835 Label loop_n, loop_m; | |
| 836 | |
| 837 Register out = x0; | |
| 838 Register inputs_n_base = x1; | |
| 839 Register inputs_m_base = x2; | |
| 840 Register inputs_d_base = x3; | |
| 841 Register inputs_n_last_16bytes = x4; | |
| 842 Register inputs_m_last_16bytes = x5; | |
| 843 Register index_n = x6; | |
| 844 Register index_m = x7; | |
| 845 | |
| 846 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); | |
| 847 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
| 848 | |
| 849 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); | |
| 850 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
| 851 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
| 852 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); | |
| 853 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); | |
| 854 | |
| 855 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form); | |
| 856 const unsigned vm_lane_count = LaneCountFromFormat(vm_form); | |
| 857 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form); | |
| 858 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form); | |
| 859 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form); | |
| 860 | |
| 861 // Always load and store 128 bits regardless of the format. | |
| 862 VRegister vd = v0.V16B(); | |
| 863 VRegister vn = v1.V16B(); | |
| 864 VRegister vm = v2.V16B(); | |
| 865 VRegister vntmp = v3.V16B(); | |
| 866 VRegister vmtmp = v4.V16B(); | |
| 867 VRegister vres = v5.V16B(); | |
| 868 | |
| 869 // These will have the correct format for calling the 'helper'. | |
| 870 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); | |
| 871 VRegister vm_helper = VRegister::Create(2, vm_bits, vm_lane_count); | |
| 872 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count); | |
| 873 | |
| 874 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. | |
| 875 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); | |
| 876 VRegister vmtmp_single = VRegister::Create(4, vm_lane_bits); | |
| 877 | |
| 878 __ Mov(out, results); | |
| 879 | |
| 880 __ Mov(inputs_d_base, inputs_d); | |
| 881 | |
| 882 __ Mov(inputs_n_base, inputs_n); | |
| 883 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16)); | |
| 884 __ Mov(inputs_m_base, inputs_m); | |
| 885 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16)); | |
| 886 | |
| 887 __ Ldr(vd, MemOperand(inputs_d_base)); | |
| 888 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); | |
| 889 __ Ldr(vm, MemOperand(inputs_m_last_16bytes)); | |
| 890 | |
| 891 __ Mov(index_n, 0); | |
| 892 __ Bind(&loop_n); | |
| 893 | |
| 894 __ Ldr(vntmp_single, | |
| 895 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); | |
| 896 __ Ext(vn, vn, vntmp, vn_lane_bytes); | |
| 897 | |
| 898 __ Mov(index_m, 0); | |
| 899 __ Bind(&loop_m); | |
| 900 | |
| 901 __ Ldr(vmtmp_single, | |
| 902 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2)); | |
| 903 __ Ext(vm, vm, vmtmp, vm_lane_bytes); | |
| 904 | |
| 905 __ Mov(vres, vd); | |
| 906 { | |
| 907 for (unsigned i = 0; i < indices_length; i++) { | |
| 908 (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]); | |
| 909 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex)); | |
| 910 } | |
| 911 } | |
| 912 | |
| 913 __ Add(index_m, index_m, 1); | |
| 914 __ Cmp(index_m, inputs_m_length); | |
| 915 __ B(lo, &loop_m); | |
| 916 | |
| 917 __ Add(index_n, index_n, 1); | |
| 918 __ Cmp(index_n, inputs_n_length); | |
| 919 __ B(lo, &loop_n); | |
| 920 | |
| 921 END(); | |
| 922 RUN(); | |
| 923 TEARDOWN(); | |
| 924 } | |
| 925 | |
| 926 // Test NEON instructions. The inputs_*[] and expected[] arrays should be | |
| 927 // arrays of rawbit representation of input values. This ensures that | |
| 928 // exact bit comparisons can be performed. | |
| 929 template <typename Td, typename Tn, typename Tm> | |
| 930 void TestByElementNEON(const char* name, TestByElementNEONHelper_t helper, | |
| 931 const Td inputs_d[], const Tn inputs_n[], | |
| 932 unsigned inputs_n_length, const Tm inputs_m[], | |
| 933 unsigned inputs_m_length, const int indices[], | |
| 934 unsigned indices_length, const Td expected[], | |
| 935 unsigned expected_length, VectorFormat vd_form, | |
| 936 VectorFormat vn_form, VectorFormat vm_form) { | |
| 937 DCHECK_GT(inputs_n_length, 0U); | |
| 938 DCHECK_GT(inputs_m_length, 0U); | |
| 939 DCHECK_GT(indices_length, 0U); | |
| 940 | |
| 941 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form); | |
| 942 | |
| 943 const unsigned results_length = | |
| 944 inputs_n_length * inputs_m_length * indices_length; | |
| 945 Td* results = new Td[results_length * vd_lane_count]; | |
| 946 const unsigned lane_bit = sizeof(Td) * 8; | |
| 947 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>(); | |
| 948 | |
| 949 TestByElementNEON_Helper( | |
| 950 helper, reinterpret_cast<uintptr_t>(inputs_d), | |
| 951 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, | |
| 952 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length, indices, | |
| 953 indices_length, reinterpret_cast<uintptr_t>(results), vd_form, vn_form, | |
| 954 vm_form); | |
| 955 | |
| 956 if (CcTest::sim_test_trace()) { | |
| 957 // Print the results. | |
| 958 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); | |
| 959 for (unsigned iteration = 0; iteration < results_length; iteration++) { | |
| 960 printf(" "); | |
| 961 // Output a separate result for each element of the result vector. | |
| 962 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
| 963 unsigned index = lane + (iteration * vd_lane_count); | |
| 964 printf(" 0x%0*" PRIx64 ",", lane_len_in_hex, | |
| 965 static_cast<uint64_t>(results[index])); | |
| 966 } | |
| 967 printf("\n"); | |
| 968 } | |
| 969 | |
| 970 printf("};\n"); | |
| 971 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", name, | |
| 972 results_length); | |
| 973 } else { | |
| 974 // Check the results. | |
| 975 CHECK(expected_length == results_length); | |
| 976 unsigned error_count = 0; | |
| 977 unsigned d = 0; | |
| 978 const char* padding = " "; | |
| 979 DCHECK_GE(strlen(padding), lane_len_in_hex + 1); | |
| 980 for (unsigned n = 0; n < inputs_n_length; n++) { | |
| 981 for (unsigned m = 0; m < inputs_m_length; m++) { | |
| 982 for (unsigned index = 0; index < indices_length; index++, d++) { | |
| 983 bool error_in_vector = false; | |
| 984 | |
| 985 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
| 986 unsigned output_index = | |
| 987 (n * inputs_m_length * indices_length * vd_lane_count) + | |
| 988 (m * indices_length * vd_lane_count) + (index * vd_lane_count) + | |
| 989 lane; | |
| 990 | |
| 991 if (results[output_index] != expected[output_index]) { | |
| 992 error_in_vector = true; | |
| 993 break; | |
| 994 } | |
| 995 } | |
| 996 | |
| 997 if (error_in_vector && (++error_count <= kErrorReportLimit)) { | |
| 998 printf("%s\n", name); | |
| 999 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n", | |
| 1000 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding, | |
| 1001 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding); | |
| 1002 | |
| 1003 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
| 1004 unsigned output_index = | |
| 1005 (n * inputs_m_length * indices_length * vd_lane_count) + | |
| 1006 (m * indices_length * vd_lane_count) + | |
| 1007 (index * vd_lane_count) + lane; | |
| 1008 unsigned input_index_n = | |
| 1009 (inputs_n_length - vd_lane_count + n + 1 + lane) % | |
| 1010 inputs_n_length; | |
| 1011 unsigned input_index_m = | |
| 1012 (inputs_m_length - vd_lane_count + m + 1 + lane) % | |
| 1013 inputs_m_length; | |
| 1014 | |
| 1015 printf( | |
| 1016 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 | |
| 1017 " " | |
| 1018 "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", | |
| 1019 results[output_index] != expected[output_index] ? '*' : ' ', | |
| 1020 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]), | |
| 1021 lane_len_in_hex, | |
| 1022 static_cast<uint64_t>(inputs_n[input_index_n]), | |
| 1023 lane_len_in_hex, | |
| 1024 static_cast<uint64_t>(inputs_m[input_index_m]), | |
| 1025 indices[index], lane_len_in_hex, | |
| 1026 static_cast<uint64_t>(results[output_index]), lane_len_in_hex, | |
| 1027 static_cast<uint64_t>(expected[output_index])); | |
| 1028 } | |
| 1029 } | |
| 1030 } | |
| 1031 } | |
| 1032 } | |
| 1033 DCHECK_EQ(d, expected_length); | |
| 1034 if (error_count > kErrorReportLimit) { | |
| 1035 printf("%u other errors follow.\n", error_count - kErrorReportLimit); | |
| 1036 } | |
| 1037 CHECK(error_count == 0); | |
| 1038 } | |
| 1039 delete[] results; | |
| 1040 } | |
| 1041 | |
| 1042 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ==== | |
| 1043 | |
| 1044 template <typename Tm> | |
| 1045 void Test2OpImmNEON_Helper( | |
| 1046 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, | |
| 1047 uintptr_t inputs_n, unsigned inputs_n_length, const Tm inputs_m[], | |
| 1048 unsigned inputs_m_length, uintptr_t results, VectorFormat vd_form, | |
| 1049 VectorFormat vn_form) { | |
| 1050 DCHECK(vd_form != kFormatUndefined && vn_form != kFormatUndefined); | |
| 1051 | |
| 1052 SETUP(); | |
| 1053 START(); | |
| 1054 | |
| 1055 // Roll up the loop to keep the code size down. | |
| 1056 Label loop_n; | |
| 1057 | |
| 1058 Register out = x0; | |
| 1059 Register inputs_n_base = x1; | |
| 1060 Register inputs_n_last_16bytes = x3; | |
| 1061 Register index_n = x5; | |
| 1062 | |
| 1063 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); | |
| 1064 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
| 1065 | |
| 1066 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); | |
| 1067 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
| 1068 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
| 1069 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); | |
| 1070 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); | |
| 1071 | |
| 1072 // These will be either a D- or a Q-register form, with a single lane | |
| 1073 // (for use in scalar load and store operations). | |
| 1074 VRegister vd = VRegister::Create(0, vd_bits); | |
| 1075 VRegister vn = v1.V16B(); | |
| 1076 VRegister vntmp = v3.V16B(); | |
| 1077 | |
| 1078 // These will have the correct format for use when calling 'helper'. | |
| 1079 VRegister vd_helper = VRegister::Create(0, vd_bits, vd_lane_count); | |
| 1080 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); | |
| 1081 | |
| 1082 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. | |
| 1083 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); | |
| 1084 | |
| 1085 __ Mov(out, results); | |
| 1086 | |
| 1087 __ Mov(inputs_n_base, inputs_n); | |
| 1088 __ Mov(inputs_n_last_16bytes, | |
| 1089 inputs_n + (vn_lane_bytes * inputs_n_length) - 16); | |
| 1090 | |
| 1091 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); | |
| 1092 | |
| 1093 __ Mov(index_n, 0); | |
| 1094 __ Bind(&loop_n); | |
| 1095 | |
| 1096 __ Ldr(vntmp_single, | |
| 1097 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); | |
| 1098 __ Ext(vn, vn, vntmp, vn_lane_bytes); | |
| 1099 | |
| 1100 // Set the destination to zero for tests such as '[r]shrn2'. | |
| 1101 // TODO(all): Setting the destination to values other than zero might be a | |
| 1102 // better test for shift and accumulate instructions (srsra/ssra/usra/ursra). | |
| 1103 __ Movi(vd.V16B(), 0); | |
| 1104 | |
| 1105 { | |
| 1106 for (unsigned i = 0; i < inputs_m_length; i++) { | |
| 1107 (masm.*helper)(vd_helper, vn_helper, inputs_m[i]); | |
| 1108 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex)); | |
| 1109 } | |
| 1110 } | |
| 1111 | |
| 1112 __ Add(index_n, index_n, 1); | |
| 1113 __ Cmp(index_n, inputs_n_length); | |
| 1114 __ B(lo, &loop_n); | |
| 1115 | |
| 1116 END(); | |
| 1117 RUN(); | |
| 1118 TEARDOWN(); | |
| 1119 } | |
| 1120 | |
| 1121 // Test NEON instructions. The inputs_*[] and expected[] arrays should be | |
| 1122 // arrays of rawbit representation of input values. This ensures that | |
| 1123 // exact bit comparisons can be performed. | |
| 1124 template <typename Td, typename Tn, typename Tm> | |
| 1125 void Test2OpImmNEON(const char* name, | |
| 1126 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, | |
| 1127 const Tn inputs_n[], unsigned inputs_n_length, | |
| 1128 const Tm inputs_m[], unsigned inputs_m_length, | |
| 1129 const Td expected[], unsigned expected_length, | |
| 1130 VectorFormat vd_form, VectorFormat vn_form) { | |
| 1131 DCHECK(inputs_n_length > 0 && inputs_m_length > 0); | |
| 1132 | |
| 1133 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
| 1134 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
| 1135 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
| 1136 | |
| 1137 const unsigned results_length = inputs_n_length * inputs_m_length; | |
| 1138 Td* results = new Td[results_length * vd_lane_count]; | |
| 1139 const unsigned lane_bit = sizeof(Td) * 8; | |
| 1140 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); | |
| 1141 | |
| 1142 Test2OpImmNEON_Helper(helper, reinterpret_cast<uintptr_t>(inputs_n), | |
| 1143 inputs_n_length, inputs_m, inputs_m_length, | |
| 1144 reinterpret_cast<uintptr_t>(results), vd_form, vn_form); | |
| 1145 | |
| 1146 if (CcTest::sim_test_trace()) { | |
| 1147 // Print the results. | |
| 1148 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); | |
| 1149 for (unsigned iteration = 0; iteration < results_length; iteration++) { | |
| 1150 printf(" "); | |
| 1151 // Output a separate result for each element of the result vector. | |
| 1152 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
| 1153 unsigned index = lane + (iteration * vd_lane_count); | |
| 1154 printf(" 0x%0*" PRIx64 ",", lane_len_in_hex, | |
| 1155 static_cast<uint64_t>(results[index])); | |
| 1156 } | |
| 1157 printf("\n"); | |
| 1158 } | |
| 1159 | |
| 1160 printf("};\n"); | |
| 1161 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", name, | |
| 1162 results_length); | |
| 1163 } else { | |
| 1164 // Check the results. | |
| 1165 CHECK(expected_length == results_length); | |
| 1166 unsigned error_count = 0; | |
| 1167 unsigned d = 0; | |
| 1168 const char* padding = " "; | |
| 1169 DCHECK_GE(strlen(padding), lane_len_in_hex + 1); | |
| 1170 for (unsigned n = 0; n < inputs_n_length; n++) { | |
| 1171 for (unsigned m = 0; m < inputs_m_length; m++, d++) { | |
| 1172 bool error_in_vector = false; | |
| 1173 | |
| 1174 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
| 1175 unsigned output_index = (n * inputs_m_length * vd_lane_count) + | |
| 1176 (m * vd_lane_count) + lane; | |
| 1177 | |
| 1178 if (results[output_index] != expected[output_index]) { | |
| 1179 error_in_vector = true; | |
| 1180 break; | |
| 1181 } | |
| 1182 } | |
| 1183 | |
| 1184 if (error_in_vector && (++error_count <= kErrorReportLimit)) { | |
| 1185 printf("%s\n", name); | |
| 1186 printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", lane_len_in_hex + 1, | |
| 1187 padding, lane_len_in_hex, padding, lane_len_in_hex + 1, | |
| 1188 padding); | |
| 1189 | |
| 1190 const unsigned first_index_n = | |
| 1191 inputs_n_length - (16 / vn_lane_bytes) + n + 1; | |
| 1192 | |
| 1193 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count); | |
| 1194 lane++) { | |
| 1195 unsigned output_index = (n * inputs_m_length * vd_lane_count) + | |
| 1196 (m * vd_lane_count) + lane; | |
| 1197 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; | |
| 1198 unsigned input_index_m = m; | |
| 1199 | |
| 1200 printf( | |
| 1201 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 | |
| 1202 " " | |
| 1203 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", | |
| 1204 results[output_index] != expected[output_index] ? '*' : ' ', | |
| 1205 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]), | |
| 1206 lane_len_in_hex, static_cast<uint64_t>(inputs_m[input_index_m]), | |
| 1207 lane_len_in_hex, static_cast<uint64_t>(results[output_index]), | |
| 1208 lane_len_in_hex, static_cast<uint64_t>(expected[output_index])); | |
| 1209 } | |
| 1210 } | |
| 1211 } | |
| 1212 } | |
| 1213 DCHECK_EQ(d, expected_length); | |
| 1214 if (error_count > kErrorReportLimit) { | |
| 1215 printf("%u other errors follow.\n", error_count - kErrorReportLimit); | |
| 1216 } | |
| 1217 CHECK(error_count == 0); | |
| 1218 } | |
| 1219 delete[] results; | |
| 1220 } | |
| 1221 | |
| 1222 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ==== | |
| 1223 | |
| 1224 void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper, | |
| 1225 uintptr_t inputs_d, const int inputs_imm1[], | |
| 1226 unsigned inputs_imm1_length, uintptr_t inputs_n, | |
| 1227 unsigned inputs_n_length, | |
| 1228 const int inputs_imm2[], | |
| 1229 unsigned inputs_imm2_length, uintptr_t results, | |
| 1230 VectorFormat vd_form, VectorFormat vn_form) { | |
| 1231 DCHECK_NE(vd_form, kFormatUndefined); | |
| 1232 DCHECK_NE(vn_form, kFormatUndefined); | |
| 1233 | |
| 1234 SETUP(); | |
| 1235 START(); | |
| 1236 | |
| 1237 // Roll up the loop to keep the code size down. | |
| 1238 Label loop_n; | |
| 1239 | |
| 1240 Register out = x0; | |
| 1241 Register inputs_d_base = x1; | |
| 1242 Register inputs_n_base = x2; | |
| 1243 Register inputs_n_last_vector = x4; | |
| 1244 Register index_n = x6; | |
| 1245 | |
| 1246 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); | |
| 1247 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
| 1248 | |
| 1249 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); | |
| 1250 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
| 1251 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
| 1252 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); | |
| 1253 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); | |
| 1254 | |
| 1255 // These will be either a D- or a Q-register form, with a single lane | |
| 1256 // (for use in scalar load and store operations). | |
| 1257 VRegister vd = VRegister::Create(0, vd_bits); | |
| 1258 VRegister vn = VRegister::Create(1, vn_bits); | |
| 1259 VRegister vntmp = VRegister::Create(4, vn_bits); | |
| 1260 VRegister vres = VRegister::Create(5, vn_bits); | |
| 1261 | |
| 1262 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); | |
| 1263 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count); | |
| 1264 | |
| 1265 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. | |
| 1266 VRegister vntmp_single = VRegister::Create(4, vn_lane_bits); | |
| 1267 | |
| 1268 // Same registers for use in the 'ext' instructions. | |
| 1269 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B(); | |
| 1270 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B(); | |
| 1271 | |
| 1272 __ Mov(out, results); | |
| 1273 | |
| 1274 __ Mov(inputs_d_base, inputs_d); | |
| 1275 | |
| 1276 __ Mov(inputs_n_base, inputs_n); | |
| 1277 __ Mov(inputs_n_last_vector, | |
| 1278 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count)); | |
| 1279 | |
| 1280 __ Ldr(vd, MemOperand(inputs_d_base)); | |
| 1281 | |
| 1282 __ Ldr(vn, MemOperand(inputs_n_last_vector)); | |
| 1283 | |
| 1284 __ Mov(index_n, 0); | |
| 1285 __ Bind(&loop_n); | |
| 1286 | |
| 1287 __ Ldr(vntmp_single, | |
| 1288 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); | |
| 1289 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes); | |
| 1290 | |
| 1291 for (unsigned i = 0; i < inputs_imm1_length; i++) { | |
| 1292 for (unsigned j = 0; j < inputs_imm2_length; j++) { | |
| 1293 __ Mov(vres, vd); | |
| 1294 (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]); | |
| 1295 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex)); | |
| 1296 } | |
| 1297 } | |
| 1298 | |
| 1299 __ Add(index_n, index_n, 1); | |
| 1300 __ Cmp(index_n, inputs_n_length); | |
| 1301 __ B(lo, &loop_n); | |
| 1302 | |
| 1303 END(); | |
| 1304 RUN(); | |
| 1305 TEARDOWN(); | |
| 1306 } | |
| 1307 | |
| 1308 // Test NEON instructions. The inputs_*[] and expected[] arrays should be | |
| 1309 // arrays of rawbit representation of input values. This ensures that | |
| 1310 // exact bit comparisons can be performed. | |
| 1311 template <typename Td, typename Tn> | |
| 1312 void TestOpImmOpImmNEON(const char* name, | |
| 1313 TestOpImmOpImmVdUpdateNEONHelper_t helper, | |
| 1314 const Td inputs_d[], const int inputs_imm1[], | |
| 1315 unsigned inputs_imm1_length, const Tn inputs_n[], | |
| 1316 unsigned inputs_n_length, const int inputs_imm2[], | |
| 1317 unsigned inputs_imm2_length, const Td expected[], | |
| 1318 unsigned expected_length, VectorFormat vd_form, | |
| 1319 VectorFormat vn_form) { | |
| 1320 DCHECK_GT(inputs_n_length, 0U); | |
| 1321 DCHECK_GT(inputs_imm1_length, 0U); | |
| 1322 DCHECK_GT(inputs_imm2_length, 0U); | |
| 1323 | |
| 1324 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
| 1325 | |
| 1326 const unsigned results_length = | |
| 1327 inputs_n_length * inputs_imm1_length * inputs_imm2_length; | |
| 1328 | |
| 1329 Td* results = new Td[results_length * vd_lane_count]; | |
| 1330 const unsigned lane_bit = sizeof(Td) * 8; | |
| 1331 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); | |
| 1332 | |
| 1333 TestOpImmOpImmNEON_Helper( | |
| 1334 helper, reinterpret_cast<uintptr_t>(inputs_d), inputs_imm1, | |
| 1335 inputs_imm1_length, reinterpret_cast<uintptr_t>(inputs_n), | |
| 1336 inputs_n_length, inputs_imm2, inputs_imm2_length, | |
| 1337 reinterpret_cast<uintptr_t>(results), vd_form, vn_form); | |
| 1338 | |
| 1339 if (CcTest::sim_test_trace()) { | |
| 1340 // Print the results. | |
| 1341 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); | |
| 1342 for (unsigned iteration = 0; iteration < results_length; iteration++) { | |
| 1343 printf(" "); | |
| 1344 // Output a separate result for each element of the result vector. | |
| 1345 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
| 1346 unsigned index = lane + (iteration * vd_lane_count); | |
| 1347 printf(" 0x%0*" PRIx64 ",", lane_len_in_hex, | |
| 1348 static_cast<uint64_t>(results[index])); | |
| 1349 } | |
| 1350 printf("\n"); | |
| 1351 } | |
| 1352 | |
| 1353 printf("};\n"); | |
| 1354 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", name, | |
| 1355 results_length); | |
| 1356 } else { | |
| 1357 // Check the results. | |
| 1358 CHECK(expected_length == results_length); | |
| 1359 unsigned error_count = 0; | |
| 1360 unsigned counted_length = 0; | |
| 1361 const char* padding = " "; | |
| 1362 DCHECK(strlen(padding) >= (lane_len_in_hex + 1)); | |
| 1363 for (unsigned n = 0; n < inputs_n_length; n++) { | |
| 1364 for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) { | |
| 1365 for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) { | |
| 1366 bool error_in_vector = false; | |
| 1367 | |
| 1368 counted_length++; | |
| 1369 | |
| 1370 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
| 1371 unsigned output_index = | |
| 1372 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) + | |
| 1373 (imm1 * inputs_imm2_length * vd_lane_count) + | |
| 1374 (imm2 * vd_lane_count) + lane; | |
| 1375 | |
| 1376 if (results[output_index] != expected[output_index]) { | |
| 1377 error_in_vector = true; | |
| 1378 break; | |
| 1379 } | |
| 1380 } | |
| 1381 | |
| 1382 if (error_in_vector && (++error_count <= kErrorReportLimit)) { | |
| 1383 printf("%s\n", name); | |
| 1384 printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", | |
| 1385 lane_len_in_hex + 1, padding, lane_len_in_hex, padding, | |
| 1386 lane_len_in_hex + 1, padding, lane_len_in_hex, padding, | |
| 1387 lane_len_in_hex + 1, padding); | |
| 1388 | |
| 1389 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
| 1390 unsigned output_index = | |
| 1391 (n * inputs_imm1_length * inputs_imm2_length * | |
| 1392 vd_lane_count) + | |
| 1393 (imm1 * inputs_imm2_length * vd_lane_count) + | |
| 1394 (imm2 * vd_lane_count) + lane; | |
| 1395 unsigned input_index_n = | |
| 1396 (inputs_n_length - vd_lane_count + n + 1 + lane) % | |
| 1397 inputs_n_length; | |
| 1398 unsigned input_index_imm1 = imm1; | |
| 1399 unsigned input_index_imm2 = imm2; | |
| 1400 | |
| 1401 printf( | |
| 1402 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 | |
| 1403 " " | |
| 1404 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", | |
| 1405 results[output_index] != expected[output_index] ? '*' : ' ', | |
| 1406 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]), | |
| 1407 lane_len_in_hex, | |
| 1408 static_cast<uint64_t>(inputs_imm1[input_index_imm1]), | |
| 1409 lane_len_in_hex, | |
| 1410 static_cast<uint64_t>(inputs_n[input_index_n]), | |
| 1411 lane_len_in_hex, | |
| 1412 static_cast<uint64_t>(inputs_imm2[input_index_imm2]), | |
| 1413 lane_len_in_hex, static_cast<uint64_t>(results[output_index]), | |
| 1414 lane_len_in_hex, | |
| 1415 static_cast<uint64_t>(expected[output_index])); | |
| 1416 } | |
| 1417 } | |
| 1418 } | |
| 1419 } | |
| 1420 } | |
| 1421 DCHECK_EQ(counted_length, expected_length); | |
| 1422 if (error_count > kErrorReportLimit) { | |
| 1423 printf("%u other errors follow.\n", error_count - kErrorReportLimit); | |
| 1424 } | |
| 1425 CHECK(error_count == 0); | |
| 1426 } | |
| 1427 delete[] results; | |
| 1428 } | |
| 1429 | |
| 1430 } // anonymous namespace | |
| 1431 | |
| 1432 // ==== NEON Tests. ==== | |
| 1433 | |
| 1434 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \ | |
| 1435 Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ | |
| 1436 &MacroAssembler::mnemonic, input_n, \ | |
| 1437 (sizeof(input_n) / sizeof(input_n[0])), \ | |
| 1438 kExpected_NEON_##mnemonic##_##vdform, \ | |
| 1439 kExpectedCount_NEON_##mnemonic##_##vdform, kFormat##vdform, \ | |
| 1440 kFormat##vnform) | |
| 1441 | |
| 1442 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n) \ | |
| 1443 Test1OpAcrossNEON( \ | |
| 1444 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(vnform), \ | |
| 1445 &MacroAssembler::mnemonic, input_n, \ | |
| 1446 (sizeof(input_n) / sizeof(input_n[0])), \ | |
| 1447 kExpected_NEON_##mnemonic##_##vdform##_##vnform, \ | |
| 1448 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform, kFormat##vdform, \ | |
| 1449 kFormat##vnform) | |
| 1450 | |
| 1451 #define CALL_TEST_NEON_HELPER_2Op(mnemonic, vdform, vnform, vmform, input_d, \ | |
| 1452 input_n, input_m) \ | |
| 1453 Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ | |
| 1454 &MacroAssembler::mnemonic, input_d, input_n, \ | |
| 1455 (sizeof(input_n) / sizeof(input_n[0])), input_m, \ | |
| 1456 (sizeof(input_m) / sizeof(input_m[0])), \ | |
| 1457 kExpected_NEON_##mnemonic##_##vdform, \ | |
| 1458 kExpectedCount_NEON_##mnemonic##_##vdform, kFormat##vdform, \ | |
| 1459 kFormat##vnform, kFormat##vmform) | |
| 1460 | |
| 1461 #define CALL_TEST_NEON_HELPER_2OpImm(mnemonic, vdform, vnform, input_n, \ | |
| 1462 input_m) \ | |
| 1463 Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \ | |
| 1464 &MacroAssembler::mnemonic, input_n, \ | |
| 1465 (sizeof(input_n) / sizeof(input_n[0])), input_m, \ | |
| 1466 (sizeof(input_m) / sizeof(input_m[0])), \ | |
| 1467 kExpected_NEON_##mnemonic##_##vdform##_2OPIMM, \ | |
| 1468 kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM, \ | |
| 1469 kFormat##vdform, kFormat##vnform) | |
| 1470 | |
| 1471 #define CALL_TEST_NEON_HELPER_ByElement(mnemonic, vdform, vnform, vmform, \ | |
| 1472 input_d, input_n, input_m, indices) \ | |
| 1473 TestByElementNEON( \ | |
| 1474 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \ | |
| 1475 vnform) "_" STRINGIFY(vmform), \ | |
| 1476 &MacroAssembler::mnemonic, input_d, input_n, \ | |
| 1477 (sizeof(input_n) / sizeof(input_n[0])), input_m, \ | |
| 1478 (sizeof(input_m) / sizeof(input_m[0])), indices, \ | |
| 1479 (sizeof(indices) / sizeof(indices[0])), \ | |
| 1480 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ | |
| 1481 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ | |
| 1482 kFormat##vdform, kFormat##vnform, kFormat##vmform) | |
| 1483 | |
| 1484 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper, mnemonic, vdform, vnform, \ | |
| 1485 input_d, input_imm1, input_n, \ | |
| 1486 input_imm2) \ | |
| 1487 TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), helper, \ | |
| 1488 input_d, input_imm1, \ | |
| 1489 (sizeof(input_imm1) / sizeof(input_imm1[0])), input_n, \ | |
| 1490 (sizeof(input_n) / sizeof(input_n[0])), input_imm2, \ | |
| 1491 (sizeof(input_imm2) / sizeof(input_imm2[0])), \ | |
| 1492 kExpected_NEON_##mnemonic##_##vdform, \ | |
| 1493 kExpectedCount_NEON_##mnemonic##_##vdform, \ | |
| 1494 kFormat##vdform, kFormat##vnform) | |
| 1495 | |
| 1496 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \ | |
| 1497 CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input) | |
| 1498 | |
| 1499 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \ | |
| 1500 SIMTEST(mnemonic##_8B) { \ | |
| 1501 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input); \ | |
| 1502 } \ | |
| 1503 SIMTEST(mnemonic##_16B) { \ | |
| 1504 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \ | |
| 1505 } | |
| 1506 | |
| 1507 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) \ | |
| 1508 SIMTEST(mnemonic##_4H) { \ | |
| 1509 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \ | |
| 1510 } \ | |
| 1511 SIMTEST(mnemonic##_8H) { \ | |
| 1512 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \ | |
| 1513 } | |
| 1514 | |
| 1515 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \ | |
| 1516 SIMTEST(mnemonic##_2S) { \ | |
| 1517 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \ | |
| 1518 } \ | |
| 1519 SIMTEST(mnemonic##_4S) { \ | |
| 1520 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \ | |
| 1521 } | |
| 1522 | |
| 1523 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \ | |
| 1524 DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \ | |
| 1525 DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) | |
| 1526 | |
| 1527 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \ | |
| 1528 DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \ | |
| 1529 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) | |
| 1530 | |
| 1531 #define DEFINE_TEST_NEON_2SAME(mnemonic, input) \ | |
| 1532 DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \ | |
| 1533 SIMTEST(mnemonic##_2D) { \ | |
| 1534 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \ | |
| 1535 } | |
| 1536 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input) \ | |
| 1537 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \ | |
| 1538 SIMTEST(mnemonic##_2D) { \ | |
| 1539 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \ | |
| 1540 } | |
| 1541 | |
| 1542 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \ | |
| 1543 SIMTEST(mnemonic##_2S) { \ | |
| 1544 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input); \ | |
| 1545 } \ | |
| 1546 SIMTEST(mnemonic##_4S) { \ | |
| 1547 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input); \ | |
| 1548 } \ | |
| 1549 SIMTEST(mnemonic##_2D) { \ | |
| 1550 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \ | |
| 1551 } | |
| 1552 | |
| 1553 #define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input) \ | |
| 1554 SIMTEST(mnemonic##_S) { \ | |
| 1555 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input); \ | |
| 1556 } \ | |
| 1557 SIMTEST(mnemonic##_D) { \ | |
| 1558 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \ | |
| 1559 } | |
| 1560 | |
| 1561 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \ | |
| 1562 SIMTEST(mnemonic##_B) { \ | |
| 1563 CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \ | |
| 1564 } | |
| 1565 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \ | |
| 1566 SIMTEST(mnemonic##_H) { \ | |
| 1567 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \ | |
| 1568 } | |
| 1569 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ | |
| 1570 SIMTEST(mnemonic##_S) { \ | |
| 1571 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \ | |
| 1572 } | |
| 1573 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) \ | |
| 1574 SIMTEST(mnemonic##_D) { \ | |
| 1575 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \ | |
| 1576 } | |
| 1577 | |
| 1578 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \ | |
| 1579 DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \ | |
| 1580 DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \ | |
| 1581 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ | |
| 1582 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) | |
| 1583 | |
| 1584 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \ | |
| 1585 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ | |
| 1586 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) | |
| 1587 | |
| 1588 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \ | |
| 1589 CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n) | |
| 1590 | |
| 1591 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input) \ | |
| 1592 SIMTEST(mnemonic##_B_8B) { \ | |
| 1593 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input); \ | |
| 1594 } \ | |
| 1595 SIMTEST(mnemonic##_B_16B) { \ | |
| 1596 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \ | |
| 1597 } \ | |
| 1598 SIMTEST(mnemonic##_H_4H) { \ | |
| 1599 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \ | |
| 1600 } \ | |
| 1601 SIMTEST(mnemonic##_H_8H) { \ | |
| 1602 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \ | |
| 1603 } \ | |
| 1604 SIMTEST(mnemonic##_S_4S) { \ | |
| 1605 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \ | |
| 1606 } | |
| 1607 | |
| 1608 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input) \ | |
| 1609 SIMTEST(mnemonic##_H_8B) { \ | |
| 1610 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input); \ | |
| 1611 } \ | |
| 1612 SIMTEST(mnemonic##_H_16B) { \ | |
| 1613 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \ | |
| 1614 } \ | |
| 1615 SIMTEST(mnemonic##_S_4H) { \ | |
| 1616 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \ | |
| 1617 } \ | |
| 1618 SIMTEST(mnemonic##_S_8H) { \ | |
| 1619 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \ | |
| 1620 } \ | |
| 1621 SIMTEST(mnemonic##_D_4S) { \ | |
| 1622 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \ | |
| 1623 } | |
| 1624 | |
| 1625 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input) \ | |
| 1626 SIMTEST(mnemonic##_S_4S) { \ | |
| 1627 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \ | |
| 1628 } | |
| 1629 | |
| 1630 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \ | |
| 1631 CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) | |
| 1632 | |
| 1633 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input) \ | |
| 1634 SIMTEST(mnemonic##_4H) { \ | |
| 1635 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input); \ | |
| 1636 } \ | |
| 1637 SIMTEST(mnemonic##_8H) { \ | |
| 1638 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \ | |
| 1639 } \ | |
| 1640 SIMTEST(mnemonic##_2S) { \ | |
| 1641 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \ | |
| 1642 } \ | |
| 1643 SIMTEST(mnemonic##_4S) { \ | |
| 1644 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \ | |
| 1645 } \ | |
| 1646 SIMTEST(mnemonic##_1D) { \ | |
| 1647 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \ | |
| 1648 } \ | |
| 1649 SIMTEST(mnemonic##_2D) { \ | |
| 1650 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \ | |
| 1651 } | |
| 1652 | |
| 1653 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input) \ | |
| 1654 SIMTEST(mnemonic##_8B) { \ | |
| 1655 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input); \ | |
| 1656 } \ | |
| 1657 SIMTEST(mnemonic##_4H) { \ | |
| 1658 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input); \ | |
| 1659 } \ | |
| 1660 SIMTEST(mnemonic##_2S) { \ | |
| 1661 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input); \ | |
| 1662 } \ | |
| 1663 SIMTEST(mnemonic##2_16B) { \ | |
| 1664 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \ | |
| 1665 } \ | |
| 1666 SIMTEST(mnemonic##2_8H) { \ | |
| 1667 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \ | |
| 1668 } \ | |
| 1669 SIMTEST(mnemonic##2_4S) { \ | |
| 1670 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \ | |
| 1671 } | |
| 1672 | |
| 1673 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input) \ | |
| 1674 SIMTEST(mnemonic##_4S) { \ | |
| 1675 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input); \ | |
| 1676 } \ | |
| 1677 SIMTEST(mnemonic##_2D) { \ | |
| 1678 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input); \ | |
| 1679 } \ | |
| 1680 SIMTEST(mnemonic##2_4S) { \ | |
| 1681 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \ | |
| 1682 } \ | |
| 1683 SIMTEST(mnemonic##2_2D) { \ | |
| 1684 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input); \ | |
| 1685 } | |
| 1686 | |
| 1687 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input) \ | |
| 1688 SIMTEST(mnemonic##_4H) { \ | |
| 1689 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input); \ | |
| 1690 } \ | |
| 1691 SIMTEST(mnemonic##_2S) { \ | |
| 1692 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \ | |
| 1693 } \ | |
| 1694 SIMTEST(mnemonic##2_8H) { \ | |
| 1695 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input); \ | |
| 1696 } \ | |
| 1697 SIMTEST(mnemonic##2_4S) { \ | |
| 1698 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \ | |
| 1699 } | |
| 1700 | |
| 1701 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input) \ | |
| 1702 SIMTEST(mnemonic##_2S) { \ | |
| 1703 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \ | |
| 1704 } \ | |
| 1705 SIMTEST(mnemonic##2_4S) { \ | |
| 1706 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \ | |
| 1707 } | |
| 1708 | |
| 1709 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input) \ | |
| 1710 SIMTEST(mnemonic##_B) { \ | |
| 1711 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \ | |
| 1712 } \ | |
| 1713 SIMTEST(mnemonic##_H) { \ | |
| 1714 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \ | |
| 1715 } \ | |
| 1716 SIMTEST(mnemonic##_S) { \ | |
| 1717 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \ | |
| 1718 } | |
| 1719 | |
| 1720 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input) \ | |
| 1721 SIMTEST(mnemonic##_S) { \ | |
| 1722 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input); \ | |
| 1723 } \ | |
| 1724 SIMTEST(mnemonic##_D) { \ | |
| 1725 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \ | |
| 1726 } | |
| 1727 | |
| 1728 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \ | |
| 1729 { \ | |
| 1730 CALL_TEST_NEON_HELPER_2Op(mnemonic, variant, variant, variant, input_d, \ | |
| 1731 input_nm, input_nm); \ | |
| 1732 } | |
| 1733 | |
| 1734 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \ | |
| 1735 SIMTEST(mnemonic##_8B) { \ | |
| 1736 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8B, kInput8bitsAccDestination, \ | |
| 1737 kInput8bits##input); \ | |
| 1738 } \ | |
| 1739 SIMTEST(mnemonic##_16B) { \ | |
| 1740 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 16B, kInput8bitsAccDestination, \ | |
| 1741 kInput8bits##input); \ | |
| 1742 } | |
| 1743 | |
| 1744 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) \ | |
| 1745 SIMTEST(mnemonic##_4H) { \ | |
| 1746 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4H, kInput16bitsAccDestination, \ | |
| 1747 kInput16bits##input); \ | |
| 1748 } \ | |
| 1749 SIMTEST(mnemonic##_8H) { \ | |
| 1750 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8H, kInput16bitsAccDestination, \ | |
| 1751 kInput16bits##input); \ | |
| 1752 } \ | |
| 1753 SIMTEST(mnemonic##_2S) { \ | |
| 1754 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, kInput32bitsAccDestination, \ | |
| 1755 kInput32bits##input); \ | |
| 1756 } \ | |
| 1757 SIMTEST(mnemonic##_4S) { \ | |
| 1758 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, kInput32bitsAccDestination, \ | |
| 1759 kInput32bits##input); \ | |
| 1760 } | |
| 1761 | |
| 1762 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \ | |
| 1763 DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \ | |
| 1764 DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) | |
| 1765 | |
| 1766 #define DEFINE_TEST_NEON_3SAME(mnemonic, input) \ | |
| 1767 DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \ | |
| 1768 SIMTEST(mnemonic##_2D) { \ | |
| 1769 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, kInput64bitsAccDestination, \ | |
| 1770 kInput64bits##input); \ | |
| 1771 } | |
| 1772 | |
| 1773 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input) \ | |
| 1774 SIMTEST(mnemonic##_2S) { \ | |
| 1775 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, kInputFloatAccDestination, \ | |
| 1776 kInputFloat##input); \ | |
| 1777 } \ | |
| 1778 SIMTEST(mnemonic##_4S) { \ | |
| 1779 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, kInputFloatAccDestination, \ | |
| 1780 kInputFloat##input); \ | |
| 1781 } \ | |
| 1782 SIMTEST(mnemonic##_2D) { \ | |
| 1783 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, kInputDoubleAccDestination, \ | |
| 1784 kInputDouble##input); \ | |
| 1785 } | |
| 1786 | |
| 1787 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input) \ | |
| 1788 SIMTEST(mnemonic##_D) { \ | |
| 1789 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInput64bitsAccDestination, \ | |
| 1790 kInput64bits##input); \ | |
| 1791 } | |
| 1792 | |
| 1793 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input) \ | |
| 1794 SIMTEST(mnemonic##_H) { \ | |
| 1795 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, kInput16bitsAccDestination, \ | |
| 1796 kInput16bits##input); \ | |
| 1797 } \ | |
| 1798 SIMTEST(mnemonic##_S) { \ | |
| 1799 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInput32bitsAccDestination, \ | |
| 1800 kInput32bits##input); \ | |
| 1801 } | |
| 1802 | |
| 1803 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input) \ | |
| 1804 SIMTEST(mnemonic##_B) { \ | |
| 1805 CALL_TEST_NEON_HELPER_3SAME(mnemonic, B, kInput8bitsAccDestination, \ | |
| 1806 kInput8bits##input); \ | |
| 1807 } \ | |
| 1808 SIMTEST(mnemonic##_H) { \ | |
| 1809 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, kInput16bitsAccDestination, \ | |
| 1810 kInput16bits##input); \ | |
| 1811 } \ | |
| 1812 SIMTEST(mnemonic##_S) { \ | |
| 1813 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInput32bitsAccDestination, \ | |
| 1814 kInput32bits##input); \ | |
| 1815 } \ | |
| 1816 SIMTEST(mnemonic##_D) { \ | |
| 1817 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInput64bitsAccDestination, \ | |
| 1818 kInput64bits##input); \ | |
| 1819 } | |
| 1820 | |
| 1821 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input) \ | |
| 1822 SIMTEST(mnemonic##_S) { \ | |
| 1823 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInputFloatAccDestination, \ | |
| 1824 kInputFloat##input); \ | |
| 1825 } \ | |
| 1826 SIMTEST(mnemonic##_D) { \ | |
| 1827 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInputDoubleAccDestination, \ | |
| 1828 kInputDouble##input); \ | |
| 1829 } | |
| 1830 | |
| 1831 #define CALL_TEST_NEON_HELPER_3DIFF(mnemonic, vdform, vnform, vmform, input_d, \ | |
| 1832 input_n, input_m) \ | |
| 1833 { \ | |
| 1834 CALL_TEST_NEON_HELPER_2Op(mnemonic, vdform, vnform, vmform, input_d, \ | |
| 1835 input_n, input_m); \ | |
| 1836 } | |
| 1837 | |
| 1838 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \ | |
| 1839 SIMTEST(mnemonic##_8H) { \ | |
| 1840 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8B, 8B, \ | |
| 1841 kInput16bitsAccDestination, \ | |
| 1842 kInput8bits##input, kInput8bits##input); \ | |
| 1843 } \ | |
| 1844 SIMTEST(mnemonic##2_8H) { \ | |
| 1845 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 16B, 16B, \ | |
| 1846 kInput16bitsAccDestination, \ | |
| 1847 kInput8bits##input, kInput8bits##input); \ | |
| 1848 } | |
| 1849 | |
| 1850 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ | |
| 1851 SIMTEST(mnemonic##_4S) { \ | |
| 1852 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4H, 4H, \ | |
| 1853 kInput32bitsAccDestination, \ | |
| 1854 kInput16bits##input, kInput16bits##input); \ | |
| 1855 } \ | |
| 1856 SIMTEST(mnemonic##2_4S) { \ | |
| 1857 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 8H, 8H, \ | |
| 1858 kInput32bitsAccDestination, \ | |
| 1859 kInput16bits##input, kInput16bits##input); \ | |
| 1860 } | |
| 1861 | |
| 1862 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) \ | |
| 1863 SIMTEST(mnemonic##_2D) { \ | |
| 1864 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2S, 2S, \ | |
| 1865 kInput64bitsAccDestination, \ | |
| 1866 kInput32bits##input, kInput32bits##input); \ | |
| 1867 } \ | |
| 1868 SIMTEST(mnemonic##2_2D) { \ | |
| 1869 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 4S, 4S, \ | |
| 1870 kInput64bitsAccDestination, \ | |
| 1871 kInput32bits##input, kInput32bits##input); \ | |
| 1872 } | |
| 1873 | |
| 1874 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \ | |
| 1875 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ | |
| 1876 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) | |
| 1877 | |
| 1878 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \ | |
| 1879 DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \ | |
| 1880 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ | |
| 1881 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) | |
| 1882 | |
| 1883 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \ | |
| 1884 SIMTEST(mnemonic##_S) { \ | |
| 1885 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, S, H, H, kInput32bitsAccDestination, \ | |
| 1886 kInput16bits##input, kInput16bits##input); \ | |
| 1887 } | |
| 1888 | |
| 1889 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \ | |
| 1890 SIMTEST(mnemonic##_D) { \ | |
| 1891 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, D, S, S, kInput64bitsAccDestination, \ | |
| 1892 kInput32bits##input, kInput32bits##input); \ | |
| 1893 } | |
| 1894 | |
| 1895 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \ | |
| 1896 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \ | |
| 1897 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) | |
| 1898 | |
| 1899 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input) \ | |
| 1900 SIMTEST(mnemonic##_8H) { \ | |
| 1901 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8H, 8B, \ | |
| 1902 kInput16bitsAccDestination, \ | |
| 1903 kInput16bits##input, kInput8bits##input); \ | |
| 1904 } \ | |
| 1905 SIMTEST(mnemonic##_4S) { \ | |
| 1906 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4S, 4H, \ | |
| 1907 kInput32bitsAccDestination, \ | |
| 1908 kInput32bits##input, kInput16bits##input); \ | |
| 1909 } \ | |
| 1910 SIMTEST(mnemonic##_2D) { \ | |
| 1911 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2D, 2S, \ | |
| 1912 kInput64bitsAccDestination, \ | |
| 1913 kInput64bits##input, kInput32bits##input); \ | |
| 1914 } \ | |
| 1915 SIMTEST(mnemonic##2_8H) { \ | |
| 1916 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 8H, 16B, \ | |
| 1917 kInput16bitsAccDestination, \ | |
| 1918 kInput16bits##input, kInput8bits##input); \ | |
| 1919 } \ | |
| 1920 SIMTEST(mnemonic##2_4S) { \ | |
| 1921 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 4S, 8H, \ | |
| 1922 kInput32bitsAccDestination, \ | |
| 1923 kInput32bits##input, kInput16bits##input); \ | |
| 1924 } \ | |
| 1925 SIMTEST(mnemonic##2_2D) { \ | |
| 1926 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 2D, 4S, \ | |
| 1927 kInput64bitsAccDestination, \ | |
| 1928 kInput64bits##input, kInput32bits##input); \ | |
| 1929 } | |
| 1930 | |
| 1931 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input) \ | |
| 1932 SIMTEST(mnemonic##_8B) { \ | |
| 1933 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8B, 8H, 8H, \ | |
| 1934 kInput8bitsAccDestination, \ | |
| 1935 kInput16bits##input, kInput16bits##input); \ | |
| 1936 } \ | |
| 1937 SIMTEST(mnemonic##_4H) { \ | |
| 1938 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4H, 4S, 4S, \ | |
| 1939 kInput16bitsAccDestination, \ | |
| 1940 kInput32bits##input, kInput32bits##input); \ | |
| 1941 } \ | |
| 1942 SIMTEST(mnemonic##_2S) { \ | |
| 1943 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2S, 2D, 2D, \ | |
| 1944 kInput32bitsAccDestination, \ | |
| 1945 kInput64bits##input, kInput64bits##input); \ | |
| 1946 } \ | |
| 1947 SIMTEST(mnemonic##2_16B) { \ | |
| 1948 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 16B, 8H, 8H, \ | |
| 1949 kInput8bitsAccDestination, \ | |
| 1950 kInput16bits##input, kInput16bits##input); \ | |
| 1951 } \ | |
| 1952 SIMTEST(mnemonic##2_8H) { \ | |
| 1953 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 4S, 4S, \ | |
| 1954 kInput16bitsAccDestination, \ | |
| 1955 kInput32bits##input, kInput32bits##input); \ | |
| 1956 } \ | |
| 1957 SIMTEST(mnemonic##2_4S) { \ | |
| 1958 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 2D, 2D, \ | |
| 1959 kInput32bitsAccDestination, \ | |
| 1960 kInput64bits##input, kInput64bits##input); \ | |
| 1961 } | |
| 1962 | |
| 1963 #define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, vdform, vnform, input_n, \ | |
| 1964 input_imm) \ | |
| 1965 { \ | |
| 1966 CALL_TEST_NEON_HELPER_2OpImm(mnemonic, vdform, vnform, input_n, \ | |
| 1967 input_imm); \ | |
| 1968 } | |
| 1969 | |
| 1970 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm) \ | |
| 1971 SIMTEST(mnemonic##_8B_2OPIMM) { \ | |
| 1972 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, 8B, kInput8bits##input, \ | |
| 1973 kInput8bitsImm##input_imm); \ | |
| 1974 } \ | |
| 1975 SIMTEST(mnemonic##_16B_2OPIMM) { \ | |
| 1976 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 16B, 16B, kInput8bits##input, \ | |
| 1977 kInput8bitsImm##input_imm); \ | |
| 1978 } \ | |
| 1979 SIMTEST(mnemonic##_4H_2OPIMM) { \ | |
| 1980 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, 4H, kInput16bits##input, \ | |
| 1981 kInput16bitsImm##input_imm); \ | |
| 1982 } \ | |
| 1983 SIMTEST(mnemonic##_8H_2OPIMM) { \ | |
| 1984 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, 8H, kInput16bits##input, \ | |
| 1985 kInput16bitsImm##input_imm); \ | |
| 1986 } \ | |
| 1987 SIMTEST(mnemonic##_2S_2OPIMM) { \ | |
| 1988 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInput32bits##input, \ | |
| 1989 kInput32bitsImm##input_imm); \ | |
| 1990 } \ | |
| 1991 SIMTEST(mnemonic##_4S_2OPIMM) { \ | |
| 1992 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInput32bits##input, \ | |
| 1993 kInput32bitsImm##input_imm); \ | |
| 1994 } \ | |
| 1995 SIMTEST(mnemonic##_2D_2OPIMM) { \ | |
| 1996 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInput64bits##input, \ | |
| 1997 kInput64bitsImm##input_imm); \ | |
| 1998 } | |
| 1999 | |
| 2000 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \ | |
| 2001 SIMTEST(mnemonic##_8B_2OPIMM) { \ | |
| 2002 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, B, kInput8bits##input, \ | |
| 2003 kInput8bitsImm##input_imm); \ | |
| 2004 } \ | |
| 2005 SIMTEST(mnemonic##_16B_2OPIMM) { \ | |
| 2006 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 16B, B, kInput8bits##input, \ | |
| 2007 kInput8bitsImm##input_imm); \ | |
| 2008 } \ | |
| 2009 SIMTEST(mnemonic##_4H_2OPIMM) { \ | |
| 2010 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, H, kInput16bits##input, \ | |
| 2011 kInput16bitsImm##input_imm); \ | |
| 2012 } \ | |
| 2013 SIMTEST(mnemonic##_8H_2OPIMM) { \ | |
| 2014 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, H, kInput16bits##input, \ | |
| 2015 kInput16bitsImm##input_imm); \ | |
| 2016 } \ | |
| 2017 SIMTEST(mnemonic##_2S_2OPIMM) { \ | |
| 2018 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, S, kInput32bits##input, \ | |
| 2019 kInput32bitsImm##input_imm); \ | |
| 2020 } \ | |
| 2021 SIMTEST(mnemonic##_4S_2OPIMM) { \ | |
| 2022 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, S, kInput32bits##input, \ | |
| 2023 kInput32bitsImm##input_imm); \ | |
| 2024 } \ | |
| 2025 SIMTEST(mnemonic##_2D_2OPIMM) { \ | |
| 2026 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, D, kInput64bits##input, \ | |
| 2027 kInput64bitsImm##input_imm); \ | |
| 2028 } | |
| 2029 | |
| 2030 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \ | |
| 2031 SIMTEST(mnemonic##_8B_2OPIMM) { \ | |
| 2032 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, 8H, kInput16bits##input, \ | |
| 2033 kInput8bitsImm##input_imm); \ | |
| 2034 } \ | |
| 2035 SIMTEST(mnemonic##_4H_2OPIMM) { \ | |
| 2036 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, 4S, kInput32bits##input, \ | |
| 2037 kInput16bitsImm##input_imm); \ | |
| 2038 } \ | |
| 2039 SIMTEST(mnemonic##_2S_2OPIMM) { \ | |
| 2040 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2D, kInput64bits##input, \ | |
| 2041 kInput32bitsImm##input_imm); \ | |
| 2042 } \ | |
| 2043 SIMTEST(mnemonic##2_16B_2OPIMM) { \ | |
| 2044 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 16B, 8H, kInput16bits##input, \ | |
| 2045 kInput8bitsImm##input_imm); \ | |
| 2046 } \ | |
| 2047 SIMTEST(mnemonic##2_8H_2OPIMM) { \ | |
| 2048 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 8H, 4S, kInput32bits##input, \ | |
| 2049 kInput16bitsImm##input_imm); \ | |
| 2050 } \ | |
| 2051 SIMTEST(mnemonic##2_4S_2OPIMM) { \ | |
| 2052 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 4S, 2D, kInput64bits##input, \ | |
| 2053 kInput32bitsImm##input_imm); \ | |
| 2054 } | |
| 2055 | |
| 2056 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \ | |
| 2057 SIMTEST(mnemonic##_B_2OPIMM) { \ | |
| 2058 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, B, H, kInput16bits##input, \ | |
| 2059 kInput8bitsImm##input_imm); \ | |
| 2060 } \ | |
| 2061 SIMTEST(mnemonic##_H_2OPIMM) { \ | |
| 2062 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, H, S, kInput32bits##input, \ | |
| 2063 kInput16bitsImm##input_imm); \ | |
| 2064 } \ | |
| 2065 SIMTEST(mnemonic##_S_2OPIMM) { \ | |
| 2066 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, D, kInput64bits##input, \ | |
| 2067 kInput32bitsImm##input_imm); \ | |
| 2068 } | |
| 2069 | |
| 2070 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \ | |
| 2071 SIMTEST(mnemonic##_2S_2OPIMM) { \ | |
| 2072 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInputFloat##Basic, \ | |
| 2073 kInputDoubleImm##input_imm) \ | |
| 2074 } \ | |
| 2075 SIMTEST(mnemonic##_4S_2OPIMM) { \ | |
| 2076 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInputFloat##input, \ | |
| 2077 kInputDoubleImm##input_imm); \ | |
| 2078 } \ | |
| 2079 SIMTEST(mnemonic##_2D_2OPIMM) { \ | |
| 2080 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInputDouble##input, \ | |
| 2081 kInputDoubleImm##input_imm); \ | |
| 2082 } | |
| 2083 | |
| 2084 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \ | |
| 2085 SIMTEST(mnemonic##_2S_2OPIMM) { \ | |
| 2086 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInputFloat##Basic, \ | |
| 2087 kInput32bitsImm##input_imm) \ | |
| 2088 } \ | |
| 2089 SIMTEST(mnemonic##_4S_2OPIMM) { \ | |
| 2090 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInputFloat##input, \ | |
| 2091 kInput32bitsImm##input_imm) \ | |
| 2092 } \ | |
| 2093 SIMTEST(mnemonic##_2D_2OPIMM) { \ | |
| 2094 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInputDouble##input, \ | |
| 2095 kInput64bitsImm##input_imm) \ | |
| 2096 } | |
| 2097 | |
| 2098 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \ | |
| 2099 SIMTEST(mnemonic##_S_2OPIMM) { \ | |
| 2100 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInputFloat##Basic, \ | |
| 2101 kInput32bitsImm##input_imm) \ | |
| 2102 } \ | |
| 2103 SIMTEST(mnemonic##_D_2OPIMM) { \ | |
| 2104 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInputDouble##input, \ | |
| 2105 kInput64bitsImm##input_imm) \ | |
| 2106 } | |
| 2107 | |
| 2108 #define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm) \ | |
| 2109 SIMTEST(mnemonic##_2S_2OPIMM) { \ | |
| 2110 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInput32bits##input, \ | |
| 2111 kInput32bitsImm##input_imm); \ | |
| 2112 } \ | |
| 2113 SIMTEST(mnemonic##_4S_2OPIMM) { \ | |
| 2114 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInput32bits##input, \ | |
| 2115 kInput32bitsImm##input_imm); \ | |
| 2116 } \ | |
| 2117 SIMTEST(mnemonic##_2D_2OPIMM) { \ | |
| 2118 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInput64bits##input, \ | |
| 2119 kInput64bitsImm##input_imm); \ | |
| 2120 } | |
| 2121 | |
| 2122 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \ | |
| 2123 SIMTEST(mnemonic##_D_2OPIMM) { \ | |
| 2124 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInput64bits##input, \ | |
| 2125 kInput64bitsImm##input_imm); \ | |
| 2126 } | |
| 2127 | |
| 2128 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) \ | |
| 2129 SIMTEST(mnemonic##_S_2OPIMM) { \ | |
| 2130 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInput32bits##input, \ | |
| 2131 kInput32bitsImm##input_imm); \ | |
| 2132 } \ | |
| 2133 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) | |
| 2134 | |
| 2135 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \ | |
| 2136 SIMTEST(mnemonic##_D_2OPIMM) { \ | |
| 2137 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInputDouble##input, \ | |
| 2138 kInputDoubleImm##input_imm); \ | |
| 2139 } | |
| 2140 | |
| 2141 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm) \ | |
| 2142 SIMTEST(mnemonic##_S_2OPIMM) { \ | |
| 2143 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInputFloat##input, \ | |
| 2144 kInputDoubleImm##input_imm); \ | |
| 2145 } \ | |
| 2146 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) | |
| 2147 | |
| 2148 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \ | |
| 2149 SIMTEST(mnemonic##_B_2OPIMM) { \ | |
| 2150 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, B, B, kInput8bits##input, \ | |
| 2151 kInput8bitsImm##input_imm); \ | |
| 2152 } \ | |
| 2153 SIMTEST(mnemonic##_H_2OPIMM) { \ | |
| 2154 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, H, H, kInput16bits##input, \ | |
| 2155 kInput16bitsImm##input_imm); \ | |
| 2156 } \ | |
| 2157 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) | |
| 2158 | |
| 2159 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \ | |
| 2160 SIMTEST(mnemonic##_8H_2OPIMM) { \ | |
| 2161 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, 8B, kInput8bits##input, \ | |
| 2162 kInput8bitsImm##input_imm); \ | |
| 2163 } \ | |
| 2164 SIMTEST(mnemonic##_4S_2OPIMM) { \ | |
| 2165 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4H, kInput16bits##input, \ | |
| 2166 kInput16bitsImm##input_imm); \ | |
| 2167 } \ | |
| 2168 SIMTEST(mnemonic##_2D_2OPIMM) { \ | |
| 2169 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2S, kInput32bits##input, \ | |
| 2170 kInput32bitsImm##input_imm); \ | |
| 2171 } \ | |
| 2172 SIMTEST(mnemonic##2_8H_2OPIMM) { \ | |
| 2173 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 8H, 16B, kInput8bits##input, \ | |
| 2174 kInput8bitsImm##input_imm); \ | |
| 2175 } \ | |
| 2176 SIMTEST(mnemonic##2_4S_2OPIMM) { \ | |
| 2177 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 4S, 8H, kInput16bits##input, \ | |
| 2178 kInput16bitsImm##input_imm); \ | |
| 2179 } \ | |
| 2180 SIMTEST(mnemonic##2_2D_2OPIMM) { \ | |
| 2181 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 2D, 4S, kInput32bits##input, \ | |
| 2182 kInput32bitsImm##input_imm); \ | |
| 2183 } | |
| 2184 | |
| 2185 #define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, vdform, vnform, vmform, \ | |
| 2186 input_d, input_n, input_m, indices) \ | |
| 2187 { \ | |
| 2188 CALL_TEST_NEON_HELPER_ByElement(mnemonic, vdform, vnform, vmform, input_d, \ | |
| 2189 input_n, input_m, indices); \ | |
| 2190 } | |
| 2191 | |
| 2192 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \ | |
| 2193 SIMTEST(mnemonic##_4H_4H_H) { \ | |
| 2194 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
| 2195 mnemonic, 4H, 4H, H, kInput16bits##input_d, kInput16bits##input_n, \ | |
| 2196 kInput16bits##input_m, kInputHIndices); \ | |
| 2197 } \ | |
| 2198 SIMTEST(mnemonic##_8H_8H_H) { \ | |
| 2199 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
| 2200 mnemonic, 8H, 8H, H, kInput16bits##input_d, kInput16bits##input_n, \ | |
| 2201 kInput16bits##input_m, kInputHIndices); \ | |
| 2202 } \ | |
| 2203 SIMTEST(mnemonic##_2S_2S_S) { \ | |
| 2204 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
| 2205 mnemonic, 2S, 2S, S, kInput32bits##input_d, kInput32bits##input_n, \ | |
| 2206 kInput32bits##input_m, kInputSIndices); \ | |
| 2207 } \ | |
| 2208 SIMTEST(mnemonic##_4S_4S_S) { \ | |
| 2209 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
| 2210 mnemonic, 4S, 4S, S, kInput32bits##input_d, kInput32bits##input_n, \ | |
| 2211 kInput32bits##input_m, kInputSIndices); \ | |
| 2212 } | |
| 2213 | |
| 2214 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \ | |
| 2215 SIMTEST(mnemonic##_H_H_H) { \ | |
| 2216 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, H, H, H, kInput16bits##input_d, \ | |
| 2217 kInput16bits##input_n, \ | |
| 2218 kInput16bits##input_m, kInputHIndices); \ | |
| 2219 } \ | |
| 2220 SIMTEST(mnemonic##_S_S_S) { \ | |
| 2221 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, S, S, kInput32bits##input_d, \ | |
| 2222 kInput32bits##input_n, \ | |
| 2223 kInput32bits##input_m, kInputSIndices); \ | |
| 2224 } | |
| 2225 | |
| 2226 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \ | |
| 2227 SIMTEST(mnemonic##_2S_2S_S) { \ | |
| 2228 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, 2S, 2S, S, kInputFloat##input_d, \ | |
| 2229 kInputFloat##input_n, \ | |
| 2230 kInputFloat##input_m, kInputSIndices); \ | |
| 2231 } \ | |
| 2232 SIMTEST(mnemonic##_4S_4S_S) { \ | |
| 2233 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, 4S, 4S, S, kInputFloat##input_d, \ | |
| 2234 kInputFloat##input_n, \ | |
| 2235 kInputFloat##input_m, kInputSIndices); \ | |
| 2236 } \ | |
| 2237 SIMTEST(mnemonic##_2D_2D_D) { \ | |
| 2238 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
| 2239 mnemonic, 2D, 2D, D, kInputDouble##input_d, kInputDouble##input_n, \ | |
| 2240 kInputDouble##input_m, kInputDIndices); \ | |
| 2241 } | |
| 2242 | |
| 2243 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \ | |
| 2244 SIMTEST(mnemonic##_S_S_S) { \ | |
| 2245 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, S, S, kInputFloat##inp_d, \ | |
| 2246 kInputFloat##inp_n, kInputFloat##inp_m, \ | |
| 2247 kInputSIndices); \ | |
| 2248 } \ | |
| 2249 SIMTEST(mnemonic##_D_D_D) { \ | |
| 2250 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, D, D, D, kInputDouble##inp_d, \ | |
| 2251 kInputDouble##inp_n, kInputDouble##inp_m, \ | |
| 2252 kInputDIndices); \ | |
| 2253 } | |
| 2254 | |
| 2255 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \ | |
| 2256 SIMTEST(mnemonic##_4S_4H_H) { \ | |
| 2257 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
| 2258 mnemonic, 4S, 4H, H, kInput32bits##input_d, kInput16bits##input_n, \ | |
| 2259 kInput16bits##input_m, kInputHIndices); \ | |
| 2260 } \ | |
| 2261 SIMTEST(mnemonic##2_4S_8H_H) { \ | |
| 2262 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
| 2263 mnemonic##2, 4S, 8H, H, kInput32bits##input_d, kInput16bits##input_n, \ | |
| 2264 kInput16bits##input_m, kInputHIndices); \ | |
| 2265 } \ | |
| 2266 SIMTEST(mnemonic##_2D_2S_S) { \ | |
| 2267 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
| 2268 mnemonic, 2D, 2S, S, kInput64bits##input_d, kInput32bits##input_n, \ | |
| 2269 kInput32bits##input_m, kInputSIndices); \ | |
| 2270 } \ | |
| 2271 SIMTEST(mnemonic##2_2D_4S_S) { \ | |
| 2272 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
| 2273 mnemonic##2, 2D, 4S, S, kInput64bits##input_d, kInput32bits##input_n, \ | |
| 2274 kInput32bits##input_m, kInputSIndices); \ | |
| 2275 } | |
| 2276 | |
| 2277 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic, input_d, input_n, \ | |
| 2278 input_m) \ | |
| 2279 SIMTEST(mnemonic##_S_H_H) { \ | |
| 2280 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, H, H, kInput32bits##input_d, \ | |
| 2281 kInput16bits##input_n, \ | |
| 2282 kInput16bits##input_m, kInputHIndices); \ | |
| 2283 } \ | |
| 2284 SIMTEST(mnemonic##_D_S_S) { \ | |
| 2285 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, D, S, S, kInput64bits##input_d, \ | |
| 2286 kInput32bits##input_n, \ | |
| 2287 kInput32bits##input_m, kInputSIndices); \ | |
| 2288 } | |
| 2289 | |
| 2290 #define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, variant, input_d, input_imm1, \ | |
| 2291 input_n, input_imm2) \ | |
| 2292 { \ | |
| 2293 CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, mnemonic, \ | |
| 2294 variant, variant, input_d, input_imm1, \ | |
| 2295 input_n, input_imm2); \ | |
| 2296 } | |
| 2297 | |
| 2298 #define DEFINE_TEST_NEON_2OP2IMM(mnemonic, input_d, input_imm1, input_n, \ | |
| 2299 input_imm2) \ | |
| 2300 SIMTEST(mnemonic##_B) { \ | |
| 2301 CALL_TEST_NEON_HELPER_2OP2IMM( \ | |
| 2302 mnemonic, 16B, kInput8bits##input_d, kInput8bitsImm##input_imm1, \ | |
| 2303 kInput8bits##input_n, kInput8bitsImm##input_imm2); \ | |
| 2304 } \ | |
| 2305 SIMTEST(mnemonic##_H) { \ | |
| 2306 CALL_TEST_NEON_HELPER_2OP2IMM( \ | |
| 2307 mnemonic, 8H, kInput16bits##input_d, kInput16bitsImm##input_imm1, \ | |
| 2308 kInput16bits##input_n, kInput16bitsImm##input_imm2); \ | |
| 2309 } \ | |
| 2310 SIMTEST(mnemonic##_S) { \ | |
| 2311 CALL_TEST_NEON_HELPER_2OP2IMM( \ | |
| 2312 mnemonic, 4S, kInput32bits##input_d, kInput32bitsImm##input_imm1, \ | |
| 2313 kInput32bits##input_n, kInput32bitsImm##input_imm2); \ | |
| 2314 } \ | |
| 2315 SIMTEST(mnemonic##_D) { \ | |
| 2316 CALL_TEST_NEON_HELPER_2OP2IMM( \ | |
| 2317 mnemonic, 2D, kInput64bits##input_d, kInput64bitsImm##input_imm1, \ | |
| 2318 kInput64bits##input_n, kInput64bitsImm##input_imm2); \ | |
| 2319 } | |
| 2320 | |
| 2321 // Advanced SIMD copy. | |
| 2322 DEFINE_TEST_NEON_2OP2IMM(ins, Basic, LaneCountFromZero, Basic, | |
| 2323 LaneCountFromZero) | |
| 2324 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero) | |
| 2325 | |
| 2326 // Advanced SIMD scalar copy. | |
| 2327 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero) | |
| 2328 | |
| 2329 // Advanced SIMD three same. | |
| 2330 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic) | |
| 2331 DEFINE_TEST_NEON_3SAME(sqadd, Basic) | |
| 2332 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic) | |
| 2333 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic) | |
| 2334 DEFINE_TEST_NEON_3SAME(sqsub, Basic) | |
| 2335 DEFINE_TEST_NEON_3SAME(cmgt, Basic) | |
| 2336 DEFINE_TEST_NEON_3SAME(cmge, Basic) | |
| 2337 DEFINE_TEST_NEON_3SAME(sshl, Basic) | |
| 2338 DEFINE_TEST_NEON_3SAME(sqshl, Basic) | |
| 2339 DEFINE_TEST_NEON_3SAME(srshl, Basic) | |
| 2340 DEFINE_TEST_NEON_3SAME(sqrshl, Basic) | |
| 2341 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic) | |
| 2342 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic) | |
| 2343 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic) | |
| 2344 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic) | |
| 2345 DEFINE_TEST_NEON_3SAME(add, Basic) | |
| 2346 DEFINE_TEST_NEON_3SAME(cmtst, Basic) | |
| 2347 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic) | |
| 2348 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic) | |
| 2349 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic) | |
| 2350 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic) | |
| 2351 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic) | |
| 2352 DEFINE_TEST_NEON_3SAME(addp, Basic) | |
| 2353 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic) | |
| 2354 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic) | |
| 2355 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic) | |
| 2356 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic) | |
| 2357 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic) | |
| 2358 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic) | |
| 2359 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic) | |
| 2360 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic) | |
| 2361 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic) | |
| 2362 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic) | |
| 2363 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic) | |
| 2364 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic) | |
| 2365 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic) | |
| 2366 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic) | |
| 2367 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic) | |
| 2368 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic) | |
| 2369 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic) | |
| 2370 DEFINE_TEST_NEON_3SAME(uqadd, Basic) | |
| 2371 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic) | |
| 2372 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic) | |
| 2373 DEFINE_TEST_NEON_3SAME(uqsub, Basic) | |
| 2374 DEFINE_TEST_NEON_3SAME(cmhi, Basic) | |
| 2375 DEFINE_TEST_NEON_3SAME(cmhs, Basic) | |
| 2376 DEFINE_TEST_NEON_3SAME(ushl, Basic) | |
| 2377 DEFINE_TEST_NEON_3SAME(uqshl, Basic) | |
| 2378 DEFINE_TEST_NEON_3SAME(urshl, Basic) | |
| 2379 DEFINE_TEST_NEON_3SAME(uqrshl, Basic) | |
| 2380 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic) | |
| 2381 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic) | |
| 2382 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic) | |
| 2383 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic) | |
| 2384 DEFINE_TEST_NEON_3SAME(sub, Basic) | |
| 2385 DEFINE_TEST_NEON_3SAME(cmeq, Basic) | |
| 2386 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic) | |
| 2387 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic) | |
| 2388 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic) | |
| 2389 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic) | |
| 2390 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic) | |
| 2391 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic) | |
| 2392 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic) | |
| 2393 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic) | |
| 2394 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic) | |
| 2395 DEFINE_TEST_NEON_3SAME_FP(facge, Basic) | |
| 2396 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic) | |
| 2397 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic) | |
| 2398 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic) | |
| 2399 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic) | |
| 2400 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic) | |
| 2401 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic) | |
| 2402 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic) | |
| 2403 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic) | |
| 2404 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic) | |
| 2405 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic) | |
| 2406 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic) | |
| 2407 | |
| 2408 // Advanced SIMD scalar three same. | |
| 2409 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic) | |
| 2410 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic) | |
| 2411 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic) | |
| 2412 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic) | |
| 2413 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic) | |
| 2414 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic) | |
| 2415 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic) | |
| 2416 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic) | |
| 2417 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic) | |
| 2418 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic) | |
| 2419 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic) | |
| 2420 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic) | |
| 2421 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic) | |
| 2422 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic) | |
| 2423 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic) | |
| 2424 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic) | |
| 2425 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic) | |
| 2426 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic) | |
| 2427 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic) | |
| 2428 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic) | |
| 2429 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic) | |
| 2430 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic) | |
| 2431 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic) | |
| 2432 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic) | |
| 2433 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic) | |
| 2434 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic) | |
| 2435 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic) | |
| 2436 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic) | |
| 2437 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic) | |
| 2438 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic) | |
| 2439 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic) | |
| 2440 | |
| 2441 // Advanced SIMD three different. | |
| 2442 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic) | |
| 2443 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic) | |
| 2444 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic) | |
| 2445 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic) | |
| 2446 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic) | |
| 2447 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic) | |
| 2448 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic) | |
| 2449 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic) | |
| 2450 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic) | |
| 2451 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic) | |
| 2452 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic) | |
| 2453 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic) | |
| 2454 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic) | |
| 2455 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic) | |
| 2456 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic) | |
| 2457 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic) | |
| 2458 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic) | |
| 2459 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic) | |
| 2460 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic) | |
| 2461 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic) | |
| 2462 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic) | |
| 2463 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic) | |
| 2464 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic) | |
| 2465 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic) | |
| 2466 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic) | |
| 2467 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic) | |
| 2468 | |
| 2469 // Advanced SIMD scalar three different. | |
| 2470 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic) | |
| 2471 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic) | |
| 2472 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic) | |
| 2473 | |
| 2474 // Advanced SIMD scalar pairwise. | |
| 2475 SIMTEST(addp_SCALAR) { | |
| 2476 CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic); | |
| 2477 } | |
| 2478 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic) | |
| 2479 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic) | |
| 2480 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic) | |
| 2481 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic) | |
| 2482 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic) | |
| 2483 | |
| 2484 // Advanced SIMD shift by immediate. | |
| 2485 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth) | |
| 2486 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth) | |
| 2487 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth) | |
| 2488 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth) | |
| 2489 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero) | |
| 2490 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero) | |
| 2491 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth) | |
| 2492 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth) | |
| 2493 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth) | |
| 2494 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth) | |
| 2495 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero) | |
| 2496 DEFINE_TEST_NEON_2OPIMM_SD(scvtf, FixedPointConversions, | |
| 2497 TypeWidthFromZeroToWidth) | |
| 2498 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth) | |
| 2499 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth) | |
| 2500 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth) | |
| 2501 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth) | |
| 2502 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth) | |
| 2503 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth) | |
| 2504 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero) | |
| 2505 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero) | |
| 2506 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero) | |
| 2507 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth) | |
| 2508 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth) | |
| 2509 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth) | |
| 2510 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth) | |
| 2511 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero) | |
| 2512 DEFINE_TEST_NEON_2OPIMM_SD(ucvtf, FixedPointConversions, | |
| 2513 TypeWidthFromZeroToWidth) | |
| 2514 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth) | |
| 2515 | |
| 2516 // Advanced SIMD scalar shift by immediate.. | |
| 2517 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth) | |
| 2518 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth) | |
| 2519 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth) | |
| 2520 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth) | |
| 2521 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero) | |
| 2522 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero) | |
| 2523 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth) | |
| 2524 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth) | |
| 2525 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf, FixedPointConversions, | |
| 2526 TypeWidthFromZeroToWidth) | |
| 2527 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth) | |
| 2528 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth) | |
| 2529 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth) | |
| 2530 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth) | |
| 2531 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth) | |
| 2532 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth) | |
| 2533 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero) | |
| 2534 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero) | |
| 2535 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero) | |
| 2536 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth) | |
| 2537 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth) | |
| 2538 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth) | |
| 2539 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth) | |
| 2540 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf, FixedPointConversions, | |
| 2541 TypeWidthFromZeroToWidth) | |
| 2542 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth) | |
| 2543 | |
| 2544 // Advanced SIMD two-register miscellaneous. | |
| 2545 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic) | |
| 2546 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic) | |
| 2547 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic) | |
| 2548 DEFINE_TEST_NEON_2SAME(suqadd, Basic) | |
| 2549 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic) | |
| 2550 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic) | |
| 2551 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic) | |
| 2552 DEFINE_TEST_NEON_2SAME(sqabs, Basic) | |
| 2553 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero) | |
| 2554 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero) | |
| 2555 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero) | |
| 2556 DEFINE_TEST_NEON_2SAME(abs, Basic) | |
| 2557 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic) | |
| 2558 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic) | |
| 2559 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions) | |
| 2560 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions) | |
| 2561 DEFINE_TEST_NEON_2SAME_FP(frintn, Conversions) | |
| 2562 DEFINE_TEST_NEON_2SAME_FP(frintm, Conversions) | |
| 2563 DEFINE_TEST_NEON_2SAME_FP(fcvtns, Conversions) | |
| 2564 DEFINE_TEST_NEON_2SAME_FP(fcvtms, Conversions) | |
| 2565 DEFINE_TEST_NEON_2SAME_FP(fcvtas, Conversions) | |
| 2566 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0. | |
| 2567 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero) | |
| 2568 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero) | |
| 2569 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero) | |
| 2570 DEFINE_TEST_NEON_2SAME_FP(fabs, Basic) | |
| 2571 DEFINE_TEST_NEON_2SAME_FP(frintp, Conversions) | |
| 2572 DEFINE_TEST_NEON_2SAME_FP(frintz, Conversions) | |
| 2573 DEFINE_TEST_NEON_2SAME_FP(fcvtps, Conversions) | |
| 2574 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0. | |
| 2575 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic) | |
| 2576 DEFINE_TEST_NEON_2SAME_FP(frecpe, Basic) | |
| 2577 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic) | |
| 2578 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic) | |
| 2579 DEFINE_TEST_NEON_2SAME(usqadd, Basic) | |
| 2580 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic) | |
| 2581 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic) | |
| 2582 DEFINE_TEST_NEON_2SAME(sqneg, Basic) | |
| 2583 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero) | |
| 2584 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero) | |
| 2585 DEFINE_TEST_NEON_2SAME(neg, Basic) | |
| 2586 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic) | |
| 2587 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL) | |
| 2588 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic) | |
| 2589 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions) | |
| 2590 DEFINE_TEST_NEON_2SAME_FP(frinta, Conversions) | |
| 2591 DEFINE_TEST_NEON_2SAME_FP(frintx, Conversions) | |
| 2592 DEFINE_TEST_NEON_2SAME_FP(fcvtnu, Conversions) | |
| 2593 DEFINE_TEST_NEON_2SAME_FP(fcvtmu, Conversions) | |
| 2594 DEFINE_TEST_NEON_2SAME_FP(fcvtau, Conversions) | |
| 2595 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0. | |
| 2596 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic) | |
| 2597 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic) | |
| 2598 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero) | |
| 2599 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero) | |
| 2600 DEFINE_TEST_NEON_2SAME_FP(fneg, Basic) | |
| 2601 DEFINE_TEST_NEON_2SAME_FP(frinti, Conversions) | |
| 2602 DEFINE_TEST_NEON_2SAME_FP(fcvtpu, Conversions) | |
| 2603 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0. | |
| 2604 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic) | |
| 2605 DEFINE_TEST_NEON_2SAME_FP(frsqrte, Basic) | |
| 2606 DEFINE_TEST_NEON_2SAME_FP(fsqrt, Basic) | |
| 2607 | |
| 2608 // Advanced SIMD scalar two-register miscellaneous. | |
| 2609 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic) | |
| 2610 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic) | |
| 2611 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero) | |
| 2612 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero) | |
| 2613 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero) | |
| 2614 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic) | |
| 2615 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic) | |
| 2616 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtns, Conversions) | |
| 2617 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtms, Conversions) | |
| 2618 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtas, Conversions) | |
| 2619 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0. | |
| 2620 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmgt, Basic, Zero) | |
| 2621 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmeq, Basic, Zero) | |
| 2622 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmlt, Basic, Zero) | |
| 2623 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtps, Conversions) | |
| 2624 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0. | |
| 2625 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpe, Basic) | |
| 2626 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpx, Basic) | |
| 2627 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic) | |
| 2628 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic) | |
| 2629 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero) | |
| 2630 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero) | |
| 2631 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic) | |
| 2632 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic) | |
| 2633 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic) | |
| 2634 SIMTEST(fcvtxn_SCALAR) { | |
| 2635 CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions); | |
| 2636 } | |
| 2637 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtnu, Conversions) | |
| 2638 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtmu, Conversions) | |
| 2639 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtau, Conversions) | |
| 2640 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0. | |
| 2641 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmge, Basic, Zero) | |
| 2642 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmle, Basic, Zero) | |
| 2643 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtpu, Conversions) | |
| 2644 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0. | |
| 2645 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frsqrte, Basic) | |
| 2646 | |
| 2647 // Advanced SIMD across lanes. | |
| 2648 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic) | |
| 2649 DEFINE_TEST_NEON_ACROSS(smaxv, Basic) | |
| 2650 DEFINE_TEST_NEON_ACROSS(sminv, Basic) | |
| 2651 DEFINE_TEST_NEON_ACROSS(addv, Basic) | |
| 2652 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic) | |
| 2653 DEFINE_TEST_NEON_ACROSS(umaxv, Basic) | |
| 2654 DEFINE_TEST_NEON_ACROSS(uminv, Basic) | |
| 2655 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic) | |
| 2656 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic) | |
| 2657 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic) | |
| 2658 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic) | |
| 2659 | |
| 2660 // Advanced SIMD permute. | |
| 2661 DEFINE_TEST_NEON_3SAME(uzp1, Basic) | |
| 2662 DEFINE_TEST_NEON_3SAME(trn1, Basic) | |
| 2663 DEFINE_TEST_NEON_3SAME(zip1, Basic) | |
| 2664 DEFINE_TEST_NEON_3SAME(uzp2, Basic) | |
| 2665 DEFINE_TEST_NEON_3SAME(trn2, Basic) | |
| 2666 DEFINE_TEST_NEON_3SAME(zip2, Basic) | |
| 2667 | |
| 2668 // Advanced SIMD vector x indexed element. | |
| 2669 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic) | |
| 2670 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic) | |
| 2671 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic) | |
| 2672 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic) | |
| 2673 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic) | |
| 2674 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic) | |
| 2675 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic) | |
| 2676 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic) | |
| 2677 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic) | |
| 2678 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic) | |
| 2679 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic) | |
| 2680 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic) | |
| 2681 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic) | |
| 2682 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic) | |
| 2683 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic) | |
| 2684 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic) | |
| 2685 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic) | |
| 2686 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic) | |
| 2687 | |
| 2688 // Advanced SIMD scalar x indexed element. | |
| 2689 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic) | |
| 2690 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic) | |
| 2691 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic) | |
| 2692 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic) | |
| 2693 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic) | |
| 2694 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic) | |
| 2695 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic) | |
| 2696 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic) | |
| 2697 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic) | |
| OLD | NEW |