Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(86)

Side by Side Diff: test/cctest/test-simulator-arm64.cc

Issue 2622643005: ARM64: Add NEON support (Closed)
Patch Set: Restore AreConsecutive change Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <cmath>
9 #include <limits>
10
11 #include "src/arm64/decoder-arm64-inl.h"
12 #include "src/arm64/disasm-arm64.h"
13 #include "src/arm64/simulator-arm64.h"
14 #include "src/arm64/utils-arm64.h"
15 #include "src/base/platform/platform.h"
16 #include "src/base/utils/random-number-generator.h"
17 #include "src/macro-assembler.h"
18 #include "test/cctest/cctest.h"
19 #include "test/cctest/test-simulator-inputs-arm64.h"
20 #include "test/cctest/test-simulator-traces-arm64.h"
21 #include "test/cctest/test-utils-arm64.h"
22
23 using namespace v8::internal;
24
25 // Test infrastructure.
26 //
27 // Tests are functions which accept no parameters and have no return values.
28 // The testing code should not perform an explicit return once completed. For
29 // example to test the mov immediate instruction a very simple test would be:
30 //
31 // SIMTEST(mov_x0_one) {
32 // SETUP();
33 //
34 // START();
35 // __ mov(x0, Operand(1));
36 // END();
37 //
38 // RUN();
39 //
40 // CHECK_EQUAL_64(1, x0);
41 //
42 // TEARDOWN();
43 // }
44 //
45 // Within a START ... END block all registers but sp can be modified. sp has to
46 // be explicitly saved/restored. The END() macro replaces the function return
47 // so it may appear multiple times in a test if the test has multiple exit
48 // points.
49 //
50 // Once the test has been run all integer and floating point registers as well
51 // as flags are accessible through a RegisterDump instance, see
52 // test-utils-arm64.h for more info on RegisterDump.
53 //
54 // We provide some helper assert to handle common cases:
55 //
56 // CHECK_EQUAL_32(int32_t, int32_t)
57 // CHECK_EQUAL_FP32(float, float)
58 // CHECK_EQUAL_32(int32_t, W register)
59 // CHECK_EQUAL_FP32(float, S register)
60 // CHECK_EQUAL_64(int64_t, int64_t)
61 // CHECK_EQUAL_FP64(double, double)
62 // CHECK_EQUAL_64(int64_t, X register)
63 // CHECK_EQUAL_64(X register, X register)
64 // CHECK_EQUAL_FP64(double, D register)
65 //
66 // e.g. CHECK_EQUAL_64(0.5, d30);
67 //
68 // If more advance computation is required before the assert then access the
69 // RegisterDump named core directly:
70 //
71 // CHECK_EQUAL_64(0x1234, core.xreg(0) & 0xffff);
72
73 #if 0 // TODO(all): enable.
74 static v8::Persistent<v8::Context> env;
75
76 static void InitializeVM() {
77 if (env.IsEmpty()) {
78 env = v8::Context::New();
79 }
80 }
81 #endif
82
83 #define __ masm.
84 #define SIMTEST(name) TEST(SIM_##name)
85
86 #define BUF_SIZE 8192
87 #define SETUP() SETUP_SIZE(BUF_SIZE)
88
89 #define INIT_V8() CcTest::InitializeVM();
90
91 #ifdef USE_SIMULATOR
92
93 // Run tests with the simulator.
94 #define SETUP_SIZE(buf_size) \
95 Isolate* isolate = CcTest::i_isolate(); \
96 HandleScope scope(isolate); \
97 CHECK(isolate != NULL); \
98 byte* buf = new byte[buf_size]; \
99 MacroAssembler masm(isolate, buf, buf_size, \
100 v8::internal::CodeObjectRequired::kYes); \
101 Decoder<DispatchingDecoderVisitor>* decoder = \
102 new Decoder<DispatchingDecoderVisitor>(); \
103 Simulator simulator(decoder); \
104 RegisterDump core;
105
106 // Reset the assembler and simulator, so that instructions can be generated,
107 // but don't actually emit any code. This can be used by tests that need to
108 // emit instructions at the start of the buffer. Note that START_AFTER_RESET
109 // must be called before any callee-saved register is modified, and before an
110 // END is encountered.
111 //
112 // Most tests should call START, rather than call RESET directly.
113 #define RESET() \
114 __ Reset(); \
115 simulator.ResetState();
116
117 #define START_AFTER_RESET() \
118 __ SetStackPointer(csp); \
119 __ PushCalleeSavedRegisters(); \
120 __ Debug("Start test.", __LINE__, TRACE_ENABLE | LOG_ALL);
121
122 #define START() \
123 RESET(); \
124 START_AFTER_RESET();
125
126 #define RUN() simulator.RunFrom(reinterpret_cast<Instruction*>(buf))
127
128 #define END() \
129 __ Debug("End test.", __LINE__, TRACE_DISABLE | LOG_ALL); \
130 core.Dump(&masm); \
131 __ PopCalleeSavedRegisters(); \
132 __ Ret(); \
133 __ GetCode(NULL);
134
135 #define TEARDOWN() delete[] buf;
136
137 #else // ifdef USE_SIMULATOR.
138 // Run the test on real hardware or models.
139 #define SETUP_SIZE(buf_size) \
140 Isolate* isolate = CcTest::i_isolate(); \
141 HandleScope scope(isolate); \
142 CHECK(isolate != NULL); \
143 size_t actual_size; \
144 byte* buf = static_cast<byte*>( \
145 v8::base::OS::Allocate(buf_size, &actual_size, true)); \
146 MacroAssembler masm(isolate, buf, actual_size, \
147 v8::internal::CodeObjectRequired::kYes); \
148 RegisterDump core;
149
150 #define RESET() \
151 __ Reset(); \
152 /* Reset the machine state (like simulator.ResetState()). */ \
153 __ Msr(NZCV, xzr); \
154 __ Msr(FPCR, xzr);
155
156 #define START_AFTER_RESET() \
157 __ SetStackPointer(csp); \
158 __ PushCalleeSavedRegisters();
159
160 #define START() \
161 RESET(); \
162 START_AFTER_RESET();
163
164 #define RUN() \
165 Assembler::FlushICache(isolate, buf, masm.SizeOfGeneratedCode()); \
166 { \
167 void (*test_function)(void); \
168 memcpy(&test_function, &buf, sizeof(buf)); \
169 test_function(); \
170 }
171
172 #define END() \
173 core.Dump(&masm); \
174 __ PopCalleeSavedRegisters(); \
175 __ Ret(); \
176 __ GetCode(NULL);
177
178 #define TEARDOWN() v8::base::OS::Free(buf, actual_size);
179
180 #endif // ifdef USE_SIMULATOR.
181
182 #define CHECK_EQUAL_NZCV(expected) CHECK(EqualNzcv(expected, core.flags_nzcv()))
183
184 #define CHECK_EQUAL_REGISTERS(expected) CHECK(EqualRegisters(&expected, &core))
185
186 #define CHECK_EQUAL_32(expected, result) \
187 CHECK(Equal32(static_cast<uint32_t>(expected), &core, result))
188
189 #define CHECK_EQUAL_FP32(expected, result) \
190 CHECK(EqualFP32(expected, &core, result))
191
192 #define CHECK_EQUAL_64(expected, result) CHECK(Equal64(expected, &core, result))
193
194 #define CHECK_EQUAL_FP64(expected, result) \
195 CHECK(EqualFP64(expected, &core, result))
196
197 #ifdef DEBUG
198 #define CHECK_LITERAL_POOL_SIZE(expected) \
199 CHECK((expected) == (__ LiteralPoolSize()))
200 #else
201 #define CHECK_LITERAL_POOL_SIZE(expected) ((void)0)
202 #endif
203
204 // The maximum number of errors to report in detail for each test.
205 static const unsigned kErrorReportLimit = 8;
206
207 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd,
208 const VRegister& vn);
209 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd,
210 const VRegister& vn,
211 const VRegister& vm);
212 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd,
213 const VRegister& vn,
214 const VRegister& vm,
215 int vm_index);
216 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(
217 const VRegister& vd, int imm1, const VRegister& vn, int imm2);
218
219 // This helps using the same typename for both the function pointer
220 // and the array of immediates passed to helper routines.
221 template <typename T>
222 class Test2OpImmediateNEONHelper_t {
223 public:
224 typedef void (MacroAssembler::*mnemonic)(const VRegister& vd,
225 const VRegister& vn, T imm);
226 };
227
228 namespace {
229
230 // Maximum number of hex characters required to represent values of either
231 // templated type.
232 template <typename Ta, typename Tb>
233 unsigned MaxHexCharCount() {
234 unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb)));
235 return (count * 8) / 4;
236 }
237
238 // ==== Tests for instructions of the form <INST> VReg, VReg. ====
239
240 void Test1OpNEON_Helper(Test1OpNEONHelper_t helper, uintptr_t inputs_n,
241 unsigned inputs_n_length, uintptr_t results,
242 VectorFormat vd_form, VectorFormat vn_form) {
243 DCHECK_NE(vd_form, kFormatUndefined);
244 DCHECK_NE(vn_form, kFormatUndefined);
245
246 SETUP();
247 START();
248
249 // Roll up the loop to keep the code size down.
250 Label loop_n;
251
252 Register out = x0;
253 Register inputs_n_base = x1;
254 Register inputs_n_last_16bytes = x3;
255 Register index_n = x5;
256
257 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
258 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
259
260 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
261 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
262 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
263 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
264 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
265
266 // These will be either a D- or a Q-register form, with a single lane
267 // (for use in scalar load and store operations).
268 VRegister vd = VRegister::Create(0, vd_bits);
269 VRegister vn = v1.V16B();
270 VRegister vntmp = v3.V16B();
271
272 // These will have the correct format for use when calling 'helper'.
273 VRegister vd_helper = VRegister::Create(0, vd_bits, vd_lane_count);
274 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count);
275
276 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
277 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits);
278
279 __ Mov(out, results);
280
281 __ Mov(inputs_n_base, inputs_n);
282 __ Mov(inputs_n_last_16bytes,
283 inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
284
285 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
286
287 __ Mov(index_n, 0);
288 __ Bind(&loop_n);
289
290 __ Ldr(vntmp_single,
291 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
292 __ Ext(vn, vn, vntmp, vn_lane_bytes);
293
294 // Set the destination to zero.
295
296 // TODO(all): Setting the destination to values other than zero might be a
297 // better test for instructions such as sqxtn2 which may leave parts of V
298 // registers unchanged.
299 __ Movi(vd.V16B(), 0);
300
301 (masm.*helper)(vd_helper, vn_helper);
302
303 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));
304
305 __ Add(index_n, index_n, 1);
306 __ Cmp(index_n, inputs_n_length);
307 __ B(lo, &loop_n);
308
309 END();
310 RUN();
311 TEARDOWN();
312 }
313
314 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
315 // arrays of rawbit representation of input values. This ensures that
316 // exact bit comparisons can be performed.
317 template <typename Td, typename Tn>
318 void Test1OpNEON(const char* name, Test1OpNEONHelper_t helper,
319 const Tn inputs_n[], unsigned inputs_n_length,
320 const Td expected[], unsigned expected_length,
321 VectorFormat vd_form, VectorFormat vn_form) {
322 DCHECK_GT(inputs_n_length, 0U);
323
324 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
325 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
326 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
327
328 const unsigned results_length = inputs_n_length;
329 std::vector<Td> results(results_length * vd_lane_count, 0);
330 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
331
332 Test1OpNEON_Helper(
333 helper, reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
334 reinterpret_cast<uintptr_t>(results.data()), vd_form, vn_form);
335
336 // Check the results.
337 CHECK(expected_length == results_length);
338 unsigned error_count = 0;
339 unsigned d = 0;
340 const char* padding = " ";
341 DCHECK_GE(strlen(padding), (lane_len_in_hex + 1));
342 for (unsigned n = 0; n < inputs_n_length; n++, d++) {
343 bool error_in_vector = false;
344
345 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
346 unsigned output_index = (n * vd_lane_count) + lane;
347
348 if (results[output_index] != expected[output_index]) {
349 error_in_vector = true;
350 break;
351 }
352 }
353
354 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
355 printf("%s\n", name);
356 printf(" Vn%.*s| Vd%.*s| Expected\n", lane_len_in_hex + 1, padding,
357 lane_len_in_hex + 1, padding);
358
359 const unsigned first_index_n =
360 inputs_n_length - (16 / vn_lane_bytes) + n + 1;
361
362 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
363 lane++) {
364 unsigned output_index = (n * vd_lane_count) + lane;
365 unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
366
367 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64
368 " "
369 "| 0x%0*" PRIx64 "\n",
370 results[output_index] != expected[output_index] ? '*' : ' ',
371 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]),
372 lane_len_in_hex, static_cast<uint64_t>(results[output_index]),
373 lane_len_in_hex, static_cast<uint64_t>(expected[output_index]));
374 }
375 }
376 }
377 DCHECK_EQ(d, expected_length);
378 if (error_count > kErrorReportLimit) {
379 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
380 }
381 DCHECK_EQ(error_count, 0U);
382 }
383
384 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ====
385 // where <V> is one of B, H, S or D registers.
386 // e.g. saddlv H1, v0.8B
387
388 // TODO(all): Change tests to store all lanes of the resulting V register.
389 // Some tests store all 128 bits of the resulting V register to
390 // check the simulator's behaviour on the rest of the register.
391 // This is better than storing the affected lanes only.
392 // Change any tests such as the 'Across' template to do the same.
393
394 void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper, uintptr_t inputs_n,
395 unsigned inputs_n_length, uintptr_t results,
396 VectorFormat vd_form, VectorFormat vn_form) {
397 DCHECK_NE(vd_form, kFormatUndefined);
398 DCHECK_NE(vn_form, kFormatUndefined);
399
400 SETUP();
401 START();
402
403 // Roll up the loop to keep the code size down.
404 Label loop_n;
405
406 Register out = x0;
407 Register inputs_n_base = x1;
408 Register inputs_n_last_vector = x3;
409 Register index_n = x5;
410
411 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
412 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
413 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
414 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
415 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
416 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
417
418 // Test destructive operations by (arbitrarily) using the same register for
419 // B and S lane sizes.
420 bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize);
421
422 // These will be either a D- or a Q-register form, with a single lane
423 // (for use in scalar load and store operations).
424 // Create two aliases for v8; the first is the destination for the tested
425 // instruction, the second, the whole Q register to check the results.
426 VRegister vd = VRegister::Create(0, vd_bits);
427 VRegister vdstr = VRegister::Create(0, kQRegSizeInBits);
428
429 VRegister vn = VRegister::Create(1, vn_bits);
430 VRegister vntmp = VRegister::Create(3, vn_bits);
431
432 // These will have the correct format for use when calling 'helper'.
433 VRegister vd_helper = VRegister::Create(0, vn_bits, vn_lane_count);
434 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count);
435
436 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
437 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits);
438
439 // Same registers for use in the 'ext' instructions.
440 VRegister vn_ext = (kDRegSizeInBits == vn_bits) ? vn.V8B() : vn.V16B();
441 VRegister vntmp_ext =
442 (kDRegSizeInBits == vn_bits) ? vntmp.V8B() : vntmp.V16B();
443
444 __ Mov(out, results);
445
446 __ Mov(inputs_n_base, inputs_n);
447 __ Mov(inputs_n_last_vector,
448 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
449
450 __ Ldr(vn, MemOperand(inputs_n_last_vector));
451
452 __ Mov(index_n, 0);
453 __ Bind(&loop_n);
454
455 __ Ldr(vntmp_single,
456 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
457 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
458
459 if (destructive) {
460 __ Mov(vd_helper, vn_helper);
461 (masm.*helper)(vd, vd_helper);
462 } else {
463 (masm.*helper)(vd, vn_helper);
464 }
465
466 __ Str(vdstr, MemOperand(out, kQRegSize, PostIndex));
467
468 __ Add(index_n, index_n, 1);
469 __ Cmp(index_n, inputs_n_length);
470 __ B(lo, &loop_n);
471
472 END();
473 RUN();
474 TEARDOWN();
475 }
476
477 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
478 // arrays of rawbit representation of input values. This ensures that
479 // exact bit comparisons can be performed.
480 template <typename Td, typename Tn>
481 void Test1OpAcrossNEON(const char* name, Test1OpNEONHelper_t helper,
482 const Tn inputs_n[], unsigned inputs_n_length,
483 const Td expected[], unsigned expected_length,
484 VectorFormat vd_form, VectorFormat vn_form) {
485 DCHECK_GT(inputs_n_length, 0U);
486
487 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
488 const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form);
489
490 const unsigned results_length = inputs_n_length;
491 std::vector<Td> results(results_length * vd_lanes_per_q, 0);
492 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
493
494 Test1OpAcrossNEON_Helper(
495 helper, reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
496 reinterpret_cast<uintptr_t>(results.data()), vd_form, vn_form);
497
498 // Check the results.
499 DCHECK_EQ(expected_length, results_length);
500 unsigned error_count = 0;
501 unsigned d = 0;
502 const char* padding = " ";
503 DCHECK_GE(strlen(padding), (lane_len_in_hex + 1));
504 for (unsigned n = 0; n < inputs_n_length; n++, d++) {
505 bool error_in_vector = false;
506
507 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
508 unsigned expected_index = (n * vd_lane_count) + lane;
509 unsigned results_index = (n * vd_lanes_per_q) + lane;
510
511 if (results[results_index] != expected[expected_index]) {
512 error_in_vector = true;
513 break;
514 }
515
516 // For across operations, the remaining lanes should be zero.
517 for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) {
518 unsigned results_index = (n * vd_lanes_per_q) + lane;
519 if (results[results_index] != 0) {
520 error_in_vector = true;
521 break;
522 }
523 }
524 }
525
526 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
527 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
528
529 printf("%s\n", name);
530 printf(" Vn%.*s| Vd%.*s| Expected\n", lane_len_in_hex + 1, padding,
531 lane_len_in_hex + 1, padding);
532
533 for (unsigned lane = 0; lane < vn_lane_count; lane++) {
534 unsigned results_index =
535 (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane);
536 unsigned input_index_n =
537 (inputs_n_length - vn_lane_count + n + 1 + lane) % inputs_n_length;
538
539 Td expect = 0;
540 if ((vn_lane_count - 1) == lane) {
541 // This is the last lane to be printed, ie. the least-significant
542 // lane, so use the expected value; any other lane should be zero.
543 unsigned expected_index = n * vd_lane_count;
544 expect = expected[expected_index];
545 }
546 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
547 results[results_index] != expect ? '*' : ' ', lane_len_in_hex,
548 static_cast<uint64_t>(inputs_n[input_index_n]), lane_len_in_hex,
549 static_cast<uint64_t>(results[results_index]), lane_len_in_hex,
550 static_cast<uint64_t>(expect));
551 }
552 }
553 }
554 DCHECK_EQ(d, expected_length);
555 if (error_count > kErrorReportLimit) {
556 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
557 }
558 DCHECK_EQ(error_count, 0U);
559 }
560
561 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ====
562
563 void Test2OpNEON_Helper(Test2OpNEONHelper_t helper, uintptr_t inputs_d,
564 uintptr_t inputs_n, unsigned inputs_n_length,
565 uintptr_t inputs_m, unsigned inputs_m_length,
566 uintptr_t results, VectorFormat vd_form,
567 VectorFormat vn_form, VectorFormat vm_form) {
568 DCHECK_NE(vd_form, kFormatUndefined);
569 DCHECK_NE(vn_form, kFormatUndefined);
570 DCHECK_NE(vm_form, kFormatUndefined);
571
572 SETUP();
573 START();
574
575 // Roll up the loop to keep the code size down.
576 Label loop_n, loop_m;
577
578 Register out = x0;
579 Register inputs_n_base = x1;
580 Register inputs_m_base = x2;
581 Register inputs_d_base = x3;
582 Register inputs_n_last_16bytes = x4;
583 Register inputs_m_last_16bytes = x5;
584 Register index_n = x6;
585 Register index_m = x7;
586
587 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
588 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
589
590 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
591 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
592 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
593 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
594 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
595
596 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
597 const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
598 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
599 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
600 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
601
602 // Always load and store 128 bits regardless of the format.
603 VRegister vd = v0.V16B();
604 VRegister vn = v1.V16B();
605 VRegister vm = v2.V16B();
606 VRegister vntmp = v3.V16B();
607 VRegister vmtmp = v4.V16B();
608 VRegister vres = v5.V16B();
609
610 // These will have the correct format for calling the 'helper'.
611 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count);
612 VRegister vm_helper = VRegister::Create(2, vm_bits, vm_lane_count);
613 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count);
614
615 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
616 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits);
617 VRegister vmtmp_single = VRegister::Create(4, vm_lane_bits);
618
619 __ Mov(out, results);
620
621 __ Mov(inputs_d_base, inputs_d);
622
623 __ Mov(inputs_n_base, inputs_n);
624 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
625 __ Mov(inputs_m_base, inputs_m);
626 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
627
628 __ Ldr(vd, MemOperand(inputs_d_base));
629 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
630 __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
631
632 __ Mov(index_n, 0);
633 __ Bind(&loop_n);
634
635 __ Ldr(vntmp_single,
636 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
637 __ Ext(vn, vn, vntmp, vn_lane_bytes);
638
639 __ Mov(index_m, 0);
640 __ Bind(&loop_m);
641
642 __ Ldr(vmtmp_single,
643 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
644 __ Ext(vm, vm, vmtmp, vm_lane_bytes);
645
646 __ Mov(vres, vd);
647
648 (masm.*helper)(vres_helper, vn_helper, vm_helper);
649
650 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
651
652 __ Add(index_m, index_m, 1);
653 __ Cmp(index_m, inputs_m_length);
654 __ B(lo, &loop_m);
655
656 __ Add(index_n, index_n, 1);
657 __ Cmp(index_n, inputs_n_length);
658 __ B(lo, &loop_n);
659
660 END();
661 RUN();
662 TEARDOWN();
663 }
664
665 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
666 // arrays of rawbit representation of input values. This ensures that
667 // exact bit comparisons can be performed.
668 template <typename Td, typename Tn, typename Tm>
669 void Test2OpNEON(const char* name, Test2OpNEONHelper_t helper,
670 const Td inputs_d[], const Tn inputs_n[],
671 unsigned inputs_n_length, const Tm inputs_m[],
672 unsigned inputs_m_length, const Td expected[],
673 unsigned expected_length, VectorFormat vd_form,
674 VectorFormat vn_form, VectorFormat vm_form) {
675 DCHECK(inputs_n_length > 0 && inputs_m_length > 0);
676
677 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
678
679 const unsigned results_length = inputs_n_length * inputs_m_length;
680 std::vector<Td> results(results_length * vd_lane_count);
681 const unsigned lane_len_in_hex =
682 static_cast<unsigned>(std::max(sizeof(Td), sizeof(Tm)) * 8) / 4;
683
684 Test2OpNEON_Helper(helper, reinterpret_cast<uintptr_t>(inputs_d),
685 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
686 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length,
687 reinterpret_cast<uintptr_t>(results.data()), vd_form,
688 vn_form, vm_form);
689
690 // Check the results.
691 CHECK(expected_length == results_length);
692 unsigned error_count = 0;
693 unsigned d = 0;
694 const char* padding = " ";
695 DCHECK_GE(strlen(padding), lane_len_in_hex + 1);
696 for (unsigned n = 0; n < inputs_n_length; n++) {
697 for (unsigned m = 0; m < inputs_m_length; m++, d++) {
698 bool error_in_vector = false;
699
700 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
701 unsigned output_index =
702 (n * inputs_m_length * vd_lane_count) + (m * vd_lane_count) + lane;
703
704 if (results[output_index] != expected[output_index]) {
705 error_in_vector = true;
706 break;
707 }
708 }
709
710 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
711 printf("%s\n", name);
712 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n",
713 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding,
714 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding);
715
716 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
717 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
718 (m * vd_lane_count) + lane;
719 unsigned input_index_n =
720 (inputs_n_length - vd_lane_count + n + 1 + lane) %
721 inputs_n_length;
722 unsigned input_index_m =
723 (inputs_m_length - vd_lane_count + m + 1 + lane) %
724 inputs_m_length;
725
726 printf(
727 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
728 " "
729 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
730 results[output_index] != expected[output_index] ? '*' : ' ',
731 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]),
732 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]),
733 lane_len_in_hex, static_cast<uint64_t>(inputs_m[input_index_m]),
734 lane_len_in_hex, static_cast<uint64_t>(results[output_index]),
735 lane_len_in_hex, static_cast<uint64_t>(expected[output_index]));
736 }
737 }
738 }
739 }
740 DCHECK_EQ(d, expected_length);
741 if (error_count > kErrorReportLimit) {
742 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
743 }
744 DCHECK_EQ(error_count, 0U);
745 }
746
747 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ====
748
749 void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
750 uintptr_t inputs_d, uintptr_t inputs_n,
751 unsigned inputs_n_length, uintptr_t inputs_m,
752 unsigned inputs_m_length, const int indices[],
753 unsigned indices_length, uintptr_t results,
754 VectorFormat vd_form, VectorFormat vn_form,
755 VectorFormat vm_form) {
756 DCHECK_NE(vd_form, kFormatUndefined);
757 DCHECK_NE(vn_form, kFormatUndefined);
758 DCHECK_NE(vm_form, kFormatUndefined);
759
760 SETUP();
761 START();
762
763 // Roll up the loop to keep the code size down.
764 Label loop_n, loop_m;
765
766 Register out = x0;
767 Register inputs_n_base = x1;
768 Register inputs_m_base = x2;
769 Register inputs_d_base = x3;
770 Register inputs_n_last_16bytes = x4;
771 Register inputs_m_last_16bytes = x5;
772 Register index_n = x6;
773 Register index_m = x7;
774
775 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
776 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
777
778 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
779 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
780 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
781 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
782 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
783
784 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
785 const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
786 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
787 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
788 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
789
790 // Always load and store 128 bits regardless of the format.
791 VRegister vd = v0.V16B();
792 VRegister vn = v1.V16B();
793 VRegister vm = v2.V16B();
794 VRegister vntmp = v3.V16B();
795 VRegister vmtmp = v4.V16B();
796 VRegister vres = v5.V16B();
797
798 // These will have the correct format for calling the 'helper'.
799 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count);
800 VRegister vm_helper = VRegister::Create(2, vm_bits, vm_lane_count);
801 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count);
802
803 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
804 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits);
805 VRegister vmtmp_single = VRegister::Create(4, vm_lane_bits);
806
807 __ Mov(out, results);
808
809 __ Mov(inputs_d_base, inputs_d);
810
811 __ Mov(inputs_n_base, inputs_n);
812 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
813 __ Mov(inputs_m_base, inputs_m);
814 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
815
816 __ Ldr(vd, MemOperand(inputs_d_base));
817 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
818 __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
819
820 __ Mov(index_n, 0);
821 __ Bind(&loop_n);
822
823 __ Ldr(vntmp_single,
824 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
825 __ Ext(vn, vn, vntmp, vn_lane_bytes);
826
827 __ Mov(index_m, 0);
828 __ Bind(&loop_m);
829
830 __ Ldr(vmtmp_single,
831 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
832 __ Ext(vm, vm, vmtmp, vm_lane_bytes);
833
834 __ Mov(vres, vd);
835 {
836 for (unsigned i = 0; i < indices_length; i++) {
837 (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]);
838 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
839 }
840 }
841
842 __ Add(index_m, index_m, 1);
843 __ Cmp(index_m, inputs_m_length);
844 __ B(lo, &loop_m);
845
846 __ Add(index_n, index_n, 1);
847 __ Cmp(index_n, inputs_n_length);
848 __ B(lo, &loop_n);
849
850 END();
851 RUN();
852 TEARDOWN();
853 }
854
855 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
856 // arrays of rawbit representation of input values. This ensures that
857 // exact bit comparisons can be performed.
858 template <typename Td, typename Tn, typename Tm>
859 void TestByElementNEON(const char* name, TestByElementNEONHelper_t helper,
860 const Td inputs_d[], const Tn inputs_n[],
861 unsigned inputs_n_length, const Tm inputs_m[],
862 unsigned inputs_m_length, const int indices[],
863 unsigned indices_length, const Td expected[],
864 unsigned expected_length, VectorFormat vd_form,
865 VectorFormat vn_form, VectorFormat vm_form) {
866 DCHECK_GT(inputs_n_length, 0U);
867 DCHECK_GT(inputs_m_length, 0U);
868 DCHECK_GT(indices_length, 0U);
869
870 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
871
872 const unsigned results_length =
873 inputs_n_length * inputs_m_length * indices_length;
874 std::vector<Td> results(results_length * vd_lane_count, 0);
875 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
876
877 TestByElementNEON_Helper(
878 helper, reinterpret_cast<uintptr_t>(inputs_d),
879 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
880 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length, indices,
881 indices_length, reinterpret_cast<uintptr_t>(results.data()), vd_form,
882 vn_form, vm_form);
883
884 // Check the results.
885 CHECK(expected_length == results_length);
886 unsigned error_count = 0;
887 unsigned d = 0;
888 const char* padding = " ";
889 DCHECK_GE(strlen(padding), lane_len_in_hex + 1);
890 for (unsigned n = 0; n < inputs_n_length; n++) {
891 for (unsigned m = 0; m < inputs_m_length; m++) {
892 for (unsigned index = 0; index < indices_length; index++, d++) {
893 bool error_in_vector = false;
894
895 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
896 unsigned output_index =
897 (n * inputs_m_length * indices_length * vd_lane_count) +
898 (m * indices_length * vd_lane_count) + (index * vd_lane_count) +
899 lane;
900
901 if (results[output_index] != expected[output_index]) {
902 error_in_vector = true;
903 break;
904 }
905 }
906
907 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
908 printf("%s\n", name);
909 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n",
910 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding,
911 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding);
912
913 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
914 unsigned output_index =
915 (n * inputs_m_length * indices_length * vd_lane_count) +
916 (m * indices_length * vd_lane_count) + (index * vd_lane_count) +
917 lane;
918 unsigned input_index_n =
919 (inputs_n_length - vd_lane_count + n + 1 + lane) %
920 inputs_n_length;
921 unsigned input_index_m =
922 (inputs_m_length - vd_lane_count + m + 1 + lane) %
923 inputs_m_length;
924
925 printf(
926 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
927 " "
928 "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
929 results[output_index] != expected[output_index] ? '*' : ' ',
930 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]),
931 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]),
932 lane_len_in_hex, static_cast<uint64_t>(inputs_m[input_index_m]),
933 indices[index], lane_len_in_hex,
934 static_cast<uint64_t>(results[output_index]), lane_len_in_hex,
935 static_cast<uint64_t>(expected[output_index]));
936 }
937 }
938 }
939 }
940 }
941 DCHECK_EQ(d, expected_length);
942 if (error_count > kErrorReportLimit) {
943 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
944 }
945 CHECK(error_count == 0);
946 }
947
948 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ====
949
950 template <typename Tm>
951 void Test2OpImmNEON_Helper(
952 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
953 uintptr_t inputs_n, unsigned inputs_n_length, const Tm inputs_m[],
954 unsigned inputs_m_length, uintptr_t results, VectorFormat vd_form,
955 VectorFormat vn_form) {
956 DCHECK(vd_form != kFormatUndefined && vn_form != kFormatUndefined);
957
958 SETUP();
959 START();
960
961 // Roll up the loop to keep the code size down.
962 Label loop_n;
963
964 Register out = x0;
965 Register inputs_n_base = x1;
966 Register inputs_n_last_16bytes = x3;
967 Register index_n = x5;
968
969 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
970 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
971
972 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
973 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
974 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
975 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
976 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
977
978 // These will be either a D- or a Q-register form, with a single lane
979 // (for use in scalar load and store operations).
980 VRegister vd = VRegister::Create(0, vd_bits);
981 VRegister vn = v1.V16B();
982 VRegister vntmp = v3.V16B();
983
984 // These will have the correct format for use when calling 'helper'.
985 VRegister vd_helper = VRegister::Create(0, vd_bits, vd_lane_count);
986 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count);
987
988 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
989 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits);
990
991 __ Mov(out, results);
992
993 __ Mov(inputs_n_base, inputs_n);
994 __ Mov(inputs_n_last_16bytes,
995 inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
996
997 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
998
999 __ Mov(index_n, 0);
1000 __ Bind(&loop_n);
1001
1002 __ Ldr(vntmp_single,
1003 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1004 __ Ext(vn, vn, vntmp, vn_lane_bytes);
1005
1006 // Set the destination to zero for tests such as '[r]shrn2'.
1007 // TODO(all): Setting the destination to values other than zero might be a
1008 // better test for shift and accumulate instructions (srsra/ssra/usra/ursra).
1009 __ Movi(vd.V16B(), 0);
1010
1011 {
1012 for (unsigned i = 0; i < inputs_m_length; i++) {
1013 (masm.*helper)(vd_helper, vn_helper, inputs_m[i]);
1014 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));
1015 }
1016 }
1017
1018 __ Add(index_n, index_n, 1);
1019 __ Cmp(index_n, inputs_n_length);
1020 __ B(lo, &loop_n);
1021
1022 END();
1023 RUN();
1024 TEARDOWN();
1025 }
1026
1027 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1028 // arrays of rawbit representation of input values. This ensures that
1029 // exact bit comparisons can be performed.
1030 template <typename Td, typename Tn, typename Tm>
1031 void Test2OpImmNEON(const char* name,
1032 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
1033 const Tn inputs_n[], unsigned inputs_n_length,
1034 const Tm inputs_m[], unsigned inputs_m_length,
1035 const Td expected[], unsigned expected_length,
1036 VectorFormat vd_form, VectorFormat vn_form) {
1037 DCHECK(inputs_n_length > 0 && inputs_m_length > 0);
1038
1039 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1040 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1041 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1042
1043 const unsigned results_length = inputs_n_length * inputs_m_length;
1044 std::vector<Td> results(results_length * vd_lane_count, 0);
1045 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1046
1047 Test2OpImmNEON_Helper(helper, reinterpret_cast<uintptr_t>(inputs_n),
1048 inputs_n_length, inputs_m, inputs_m_length,
1049 reinterpret_cast<uintptr_t>(results.data()), vd_form,
1050 vn_form);
1051
1052 // Check the results.
1053 CHECK(expected_length == results_length);
1054 unsigned error_count = 0;
1055 unsigned d = 0;
1056 const char* padding = " ";
1057 DCHECK_GE(strlen(padding), lane_len_in_hex + 1);
1058 for (unsigned n = 0; n < inputs_n_length; n++) {
1059 for (unsigned m = 0; m < inputs_m_length; m++, d++) {
1060 bool error_in_vector = false;
1061
1062 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1063 unsigned output_index =
1064 (n * inputs_m_length * vd_lane_count) + (m * vd_lane_count) + lane;
1065
1066 if (results[output_index] != expected[output_index]) {
1067 error_in_vector = true;
1068 break;
1069 }
1070 }
1071
1072 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1073 printf("%s\n", name);
1074 printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", lane_len_in_hex + 1,
1075 padding, lane_len_in_hex, padding, lane_len_in_hex + 1, padding);
1076
1077 const unsigned first_index_n =
1078 inputs_n_length - (16 / vn_lane_bytes) + n + 1;
1079
1080 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
1081 lane++) {
1082 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
1083 (m * vd_lane_count) + lane;
1084 unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
1085 unsigned input_index_m = m;
1086
1087 printf(
1088 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64
1089 " "
1090 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1091 results[output_index] != expected[output_index] ? '*' : ' ',
1092 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]),
1093 lane_len_in_hex, static_cast<uint64_t>(inputs_m[input_index_m]),
1094 lane_len_in_hex, static_cast<uint64_t>(results[output_index]),
1095 lane_len_in_hex, static_cast<uint64_t>(expected[output_index]));
1096 }
1097 }
1098 }
1099 }
1100 DCHECK_EQ(d, expected_length);
1101 if (error_count > kErrorReportLimit) {
1102 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1103 }
1104 CHECK(error_count == 0);
1105 }
1106
1107 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====
1108
1109 void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,
1110 uintptr_t inputs_d, const int inputs_imm1[],
1111 unsigned inputs_imm1_length, uintptr_t inputs_n,
1112 unsigned inputs_n_length,
1113 const int inputs_imm2[],
1114 unsigned inputs_imm2_length, uintptr_t results,
1115 VectorFormat vd_form, VectorFormat vn_form) {
1116 DCHECK_NE(vd_form, kFormatUndefined);
1117 DCHECK_NE(vn_form, kFormatUndefined);
1118
1119 SETUP();
1120 START();
1121
1122 // Roll up the loop to keep the code size down.
1123 Label loop_n;
1124
1125 Register out = x0;
1126 Register inputs_d_base = x1;
1127 Register inputs_n_base = x2;
1128 Register inputs_n_last_vector = x4;
1129 Register index_n = x6;
1130
1131 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1132 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1133
1134 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1135 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1136 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1137 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1138 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1139
1140 // These will be either a D- or a Q-register form, with a single lane
1141 // (for use in scalar load and store operations).
1142 VRegister vd = VRegister::Create(0, vd_bits);
1143 VRegister vn = VRegister::Create(1, vn_bits);
1144 VRegister vntmp = VRegister::Create(4, vn_bits);
1145 VRegister vres = VRegister::Create(5, vn_bits);
1146
1147 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count);
1148 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count);
1149
1150 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1151 VRegister vntmp_single = VRegister::Create(4, vn_lane_bits);
1152
1153 // Same registers for use in the 'ext' instructions.
1154 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
1155 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
1156
1157 __ Mov(out, results);
1158
1159 __ Mov(inputs_d_base, inputs_d);
1160
1161 __ Mov(inputs_n_base, inputs_n);
1162 __ Mov(inputs_n_last_vector,
1163 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
1164
1165 __ Ldr(vd, MemOperand(inputs_d_base));
1166
1167 __ Ldr(vn, MemOperand(inputs_n_last_vector));
1168
1169 __ Mov(index_n, 0);
1170 __ Bind(&loop_n);
1171
1172 __ Ldr(vntmp_single,
1173 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1174 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
1175
1176 for (unsigned i = 0; i < inputs_imm1_length; i++) {
1177 for (unsigned j = 0; j < inputs_imm2_length; j++) {
1178 __ Mov(vres, vd);
1179 (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]);
1180 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
1181 }
1182 }
1183
1184 __ Add(index_n, index_n, 1);
1185 __ Cmp(index_n, inputs_n_length);
1186 __ B(lo, &loop_n);
1187
1188 END();
1189 RUN();
1190 TEARDOWN();
1191 }
1192
1193 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1194 // arrays of rawbit representation of input values. This ensures that
1195 // exact bit comparisons can be performed.
1196 template <typename Td, typename Tn>
1197 void TestOpImmOpImmNEON(const char* name,
1198 TestOpImmOpImmVdUpdateNEONHelper_t helper,
1199 const Td inputs_d[], const int inputs_imm1[],
1200 unsigned inputs_imm1_length, const Tn inputs_n[],
1201 unsigned inputs_n_length, const int inputs_imm2[],
1202 unsigned inputs_imm2_length, const Td expected[],
1203 unsigned expected_length, VectorFormat vd_form,
1204 VectorFormat vn_form) {
1205 DCHECK_GT(inputs_n_length, 0U);
1206 DCHECK_GT(inputs_imm1_length, 0U);
1207 DCHECK_GT(inputs_imm2_length, 0U);
1208
1209 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1210
1211 const unsigned results_length =
1212 inputs_n_length * inputs_imm1_length * inputs_imm2_length;
1213
1214 std::vector<Td> results(results_length * vd_lane_count, 0);
1215 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1216
1217 TestOpImmOpImmNEON_Helper(
1218 helper, reinterpret_cast<uintptr_t>(inputs_d), inputs_imm1,
1219 inputs_imm1_length, reinterpret_cast<uintptr_t>(inputs_n),
1220 inputs_n_length, inputs_imm2, inputs_imm2_length,
1221 reinterpret_cast<uintptr_t>(results.data()), vd_form, vn_form);
1222
1223 // Check the results.
1224 CHECK(expected_length == results_length);
1225 unsigned error_count = 0;
1226 unsigned counted_length = 0;
1227 const char* padding = " ";
1228 DCHECK(strlen(padding) >= (lane_len_in_hex + 1));
1229 for (unsigned n = 0; n < inputs_n_length; n++) {
1230 for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) {
1231 for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) {
1232 bool error_in_vector = false;
1233
1234 counted_length++;
1235
1236 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1237 unsigned output_index =
1238 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) +
1239 (imm1 * inputs_imm2_length * vd_lane_count) +
1240 (imm2 * vd_lane_count) + lane;
1241
1242 if (results[output_index] != expected[output_index]) {
1243 error_in_vector = true;
1244 break;
1245 }
1246 }
1247
1248 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1249 printf("%s\n", name);
1250 printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
1251 lane_len_in_hex + 1, padding, lane_len_in_hex, padding,
1252 lane_len_in_hex + 1, padding, lane_len_in_hex, padding,
1253 lane_len_in_hex + 1, padding);
1254
1255 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1256 unsigned output_index =
1257 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) +
1258 (imm1 * inputs_imm2_length * vd_lane_count) +
1259 (imm2 * vd_lane_count) + lane;
1260 unsigned input_index_n =
1261 (inputs_n_length - vd_lane_count + n + 1 + lane) %
1262 inputs_n_length;
1263 unsigned input_index_imm1 = imm1;
1264 unsigned input_index_imm2 = imm2;
1265
1266 printf(
1267 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
1268 " "
1269 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1270 results[output_index] != expected[output_index] ? '*' : ' ',
1271 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]),
1272 lane_len_in_hex,
1273 static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
1274 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]),
1275 lane_len_in_hex,
1276 static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
1277 lane_len_in_hex, static_cast<uint64_t>(results[output_index]),
1278 lane_len_in_hex, static_cast<uint64_t>(expected[output_index]));
1279 }
1280 }
1281 }
1282 }
1283 }
1284 DCHECK_EQ(counted_length, expected_length);
1285 if (error_count > kErrorReportLimit) {
1286 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1287 }
1288 CHECK(error_count == 0);
1289 }
1290
1291 } // anonymous namespace
1292
1293 // ==== NEON Tests. ====
1294
1295 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \
1296 Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
1297 &MacroAssembler::mnemonic, input_n, \
1298 (sizeof(input_n) / sizeof(input_n[0])), \
1299 kExpected_NEON_##mnemonic##_##vdform, \
1300 kExpectedCount_NEON_##mnemonic##_##vdform, kFormat##vdform, \
1301 kFormat##vnform)
1302
1303 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n) \
1304 Test1OpAcrossNEON( \
1305 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(vnform), \
1306 &MacroAssembler::mnemonic, input_n, \
1307 (sizeof(input_n) / sizeof(input_n[0])), \
1308 kExpected_NEON_##mnemonic##_##vdform##_##vnform, \
1309 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform, kFormat##vdform, \
1310 kFormat##vnform)
1311
1312 #define CALL_TEST_NEON_HELPER_2Op(mnemonic, vdform, vnform, vmform, input_d, \
1313 input_n, input_m) \
1314 Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
1315 &MacroAssembler::mnemonic, input_d, input_n, \
1316 (sizeof(input_n) / sizeof(input_n[0])), input_m, \
1317 (sizeof(input_m) / sizeof(input_m[0])), \
1318 kExpected_NEON_##mnemonic##_##vdform, \
1319 kExpectedCount_NEON_##mnemonic##_##vdform, kFormat##vdform, \
1320 kFormat##vnform, kFormat##vmform)
1321
1322 #define CALL_TEST_NEON_HELPER_2OpImm(mnemonic, vdform, vnform, input_n, \
1323 input_m) \
1324 Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \
1325 &MacroAssembler::mnemonic, input_n, \
1326 (sizeof(input_n) / sizeof(input_n[0])), input_m, \
1327 (sizeof(input_m) / sizeof(input_m[0])), \
1328 kExpected_NEON_##mnemonic##_##vdform##_2OPIMM, \
1329 kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM, \
1330 kFormat##vdform, kFormat##vnform)
1331
1332 #define CALL_TEST_NEON_HELPER_ByElement(mnemonic, vdform, vnform, vmform, \
1333 input_d, input_n, input_m, indices) \
1334 TestByElementNEON( \
1335 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
1336 vnform) "_" STRINGIFY(vmform), \
1337 &MacroAssembler::mnemonic, input_d, input_n, \
1338 (sizeof(input_n) / sizeof(input_n[0])), input_m, \
1339 (sizeof(input_m) / sizeof(input_m[0])), indices, \
1340 (sizeof(indices) / sizeof(indices[0])), \
1341 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
1342 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
1343 kFormat##vdform, kFormat##vnform, kFormat##vmform)
1344
1345 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper, mnemonic, vdform, vnform, \
1346 input_d, input_imm1, input_n, \
1347 input_imm2) \
1348 TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), helper, \
1349 input_d, input_imm1, \
1350 (sizeof(input_imm1) / sizeof(input_imm1[0])), input_n, \
1351 (sizeof(input_n) / sizeof(input_n[0])), input_imm2, \
1352 (sizeof(input_imm2) / sizeof(input_imm2[0])), \
1353 kExpected_NEON_##mnemonic##_##vdform, \
1354 kExpectedCount_NEON_##mnemonic##_##vdform, \
1355 kFormat##vdform, kFormat##vnform)
1356
1357 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \
1358 CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input)
1359
1360 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \
1361 SIMTEST(mnemonic##_8B) { \
1362 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input); \
1363 } \
1364 SIMTEST(mnemonic##_16B) { \
1365 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \
1366 }
1367
1368 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) \
1369 SIMTEST(mnemonic##_4H) { \
1370 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \
1371 } \
1372 SIMTEST(mnemonic##_8H) { \
1373 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \
1374 }
1375
1376 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \
1377 SIMTEST(mnemonic##_2S) { \
1378 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \
1379 } \
1380 SIMTEST(mnemonic##_4S) { \
1381 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \
1382 }
1383
1384 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
1385 DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \
1386 DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
1387
1388 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
1389 DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
1390 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
1391
1392 #define DEFINE_TEST_NEON_2SAME(mnemonic, input) \
1393 DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
1394 SIMTEST(mnemonic##_2D) { \
1395 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
1396 }
1397 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input) \
1398 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \
1399 SIMTEST(mnemonic##_2D) { \
1400 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
1401 }
1402
1403 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \
1404 SIMTEST(mnemonic##_2S) { \
1405 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input); \
1406 } \
1407 SIMTEST(mnemonic##_4S) { \
1408 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input); \
1409 } \
1410 SIMTEST(mnemonic##_2D) { \
1411 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \
1412 }
1413
1414 #define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input) \
1415 SIMTEST(mnemonic##_S) { \
1416 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input); \
1417 } \
1418 SIMTEST(mnemonic##_D) { \
1419 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \
1420 }
1421
1422 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \
1423 SIMTEST(mnemonic##_B) { \
1424 CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \
1425 }
1426 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \
1427 SIMTEST(mnemonic##_H) { \
1428 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \
1429 }
1430 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
1431 SIMTEST(mnemonic##_S) { \
1432 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \
1433 }
1434 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) \
1435 SIMTEST(mnemonic##_D) { \
1436 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \
1437 }
1438
1439 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \
1440 DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \
1441 DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \
1442 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
1443 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
1444
1445 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \
1446 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
1447 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
1448
1449 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \
1450 CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n)
1451
1452 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input) \
1453 SIMTEST(mnemonic##_B_8B) { \
1454 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input); \
1455 } \
1456 SIMTEST(mnemonic##_B_16B) { \
1457 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \
1458 } \
1459 SIMTEST(mnemonic##_H_4H) { \
1460 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \
1461 } \
1462 SIMTEST(mnemonic##_H_8H) { \
1463 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \
1464 } \
1465 SIMTEST(mnemonic##_S_4S) { \
1466 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \
1467 }
1468
1469 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input) \
1470 SIMTEST(mnemonic##_H_8B) { \
1471 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input); \
1472 } \
1473 SIMTEST(mnemonic##_H_16B) { \
1474 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \
1475 } \
1476 SIMTEST(mnemonic##_S_4H) { \
1477 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \
1478 } \
1479 SIMTEST(mnemonic##_S_8H) { \
1480 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \
1481 } \
1482 SIMTEST(mnemonic##_D_4S) { \
1483 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \
1484 }
1485
1486 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input) \
1487 SIMTEST(mnemonic##_S_4S) { \
1488 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \
1489 }
1490
1491 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \
1492 CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n)
1493
1494 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input) \
1495 SIMTEST(mnemonic##_4H) { \
1496 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input); \
1497 } \
1498 SIMTEST(mnemonic##_8H) { \
1499 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \
1500 } \
1501 SIMTEST(mnemonic##_2S) { \
1502 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \
1503 } \
1504 SIMTEST(mnemonic##_4S) { \
1505 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \
1506 } \
1507 SIMTEST(mnemonic##_1D) { \
1508 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \
1509 } \
1510 SIMTEST(mnemonic##_2D) { \
1511 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \
1512 }
1513
1514 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input) \
1515 SIMTEST(mnemonic##_8B) { \
1516 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input); \
1517 } \
1518 SIMTEST(mnemonic##_4H) { \
1519 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input); \
1520 } \
1521 SIMTEST(mnemonic##_2S) { \
1522 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input); \
1523 } \
1524 SIMTEST(mnemonic##2_16B) { \
1525 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \
1526 } \
1527 SIMTEST(mnemonic##2_8H) { \
1528 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \
1529 } \
1530 SIMTEST(mnemonic##2_4S) { \
1531 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \
1532 }
1533
1534 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input) \
1535 SIMTEST(mnemonic##_4S) { \
1536 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input); \
1537 } \
1538 SIMTEST(mnemonic##_2D) { \
1539 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input); \
1540 } \
1541 SIMTEST(mnemonic##2_4S) { \
1542 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \
1543 } \
1544 SIMTEST(mnemonic##2_2D) { \
1545 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input); \
1546 }
1547
1548 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input) \
1549 SIMTEST(mnemonic##_4H) { \
1550 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input); \
1551 } \
1552 SIMTEST(mnemonic##_2S) { \
1553 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \
1554 } \
1555 SIMTEST(mnemonic##2_8H) { \
1556 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input); \
1557 } \
1558 SIMTEST(mnemonic##2_4S) { \
1559 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
1560 }
1561
1562 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input) \
1563 SIMTEST(mnemonic##_2S) { \
1564 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \
1565 } \
1566 SIMTEST(mnemonic##2_4S) { \
1567 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
1568 }
1569
1570 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input) \
1571 SIMTEST(mnemonic##_B) { \
1572 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \
1573 } \
1574 SIMTEST(mnemonic##_H) { \
1575 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \
1576 } \
1577 SIMTEST(mnemonic##_S) { \
1578 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \
1579 }
1580
1581 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input) \
1582 SIMTEST(mnemonic##_S) { \
1583 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input); \
1584 } \
1585 SIMTEST(mnemonic##_D) { \
1586 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \
1587 }
1588
1589 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \
1590 { \
1591 CALL_TEST_NEON_HELPER_2Op(mnemonic, variant, variant, variant, input_d, \
1592 input_nm, input_nm); \
1593 }
1594
1595 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \
1596 SIMTEST(mnemonic##_8B) { \
1597 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8B, kInput8bitsAccDestination, \
1598 kInput8bits##input); \
1599 } \
1600 SIMTEST(mnemonic##_16B) { \
1601 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 16B, kInput8bitsAccDestination, \
1602 kInput8bits##input); \
1603 }
1604
1605 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) \
1606 SIMTEST(mnemonic##_4H) { \
1607 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4H, kInput16bitsAccDestination, \
1608 kInput16bits##input); \
1609 } \
1610 SIMTEST(mnemonic##_8H) { \
1611 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8H, kInput16bitsAccDestination, \
1612 kInput16bits##input); \
1613 } \
1614 SIMTEST(mnemonic##_2S) { \
1615 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, kInput32bitsAccDestination, \
1616 kInput32bits##input); \
1617 } \
1618 SIMTEST(mnemonic##_4S) { \
1619 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, kInput32bitsAccDestination, \
1620 kInput32bits##input); \
1621 }
1622
1623 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
1624 DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \
1625 DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
1626
1627 #define DEFINE_TEST_NEON_3SAME(mnemonic, input) \
1628 DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
1629 SIMTEST(mnemonic##_2D) { \
1630 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, kInput64bitsAccDestination, \
1631 kInput64bits##input); \
1632 }
1633
1634 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input) \
1635 SIMTEST(mnemonic##_2S) { \
1636 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, kInputFloatAccDestination, \
1637 kInputFloat##input); \
1638 } \
1639 SIMTEST(mnemonic##_4S) { \
1640 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, kInputFloatAccDestination, \
1641 kInputFloat##input); \
1642 } \
1643 SIMTEST(mnemonic##_2D) { \
1644 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, kInputDoubleAccDestination, \
1645 kInputDouble##input); \
1646 }
1647
1648 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input) \
1649 SIMTEST(mnemonic##_D) { \
1650 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInput64bitsAccDestination, \
1651 kInput64bits##input); \
1652 }
1653
1654 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input) \
1655 SIMTEST(mnemonic##_H) { \
1656 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, kInput16bitsAccDestination, \
1657 kInput16bits##input); \
1658 } \
1659 SIMTEST(mnemonic##_S) { \
1660 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInput32bitsAccDestination, \
1661 kInput32bits##input); \
1662 }
1663
1664 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input) \
1665 SIMTEST(mnemonic##_B) { \
1666 CALL_TEST_NEON_HELPER_3SAME(mnemonic, B, kInput8bitsAccDestination, \
1667 kInput8bits##input); \
1668 } \
1669 SIMTEST(mnemonic##_H) { \
1670 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, kInput16bitsAccDestination, \
1671 kInput16bits##input); \
1672 } \
1673 SIMTEST(mnemonic##_S) { \
1674 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInput32bitsAccDestination, \
1675 kInput32bits##input); \
1676 } \
1677 SIMTEST(mnemonic##_D) { \
1678 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInput64bitsAccDestination, \
1679 kInput64bits##input); \
1680 }
1681
1682 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input) \
1683 SIMTEST(mnemonic##_S) { \
1684 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInputFloatAccDestination, \
1685 kInputFloat##input); \
1686 } \
1687 SIMTEST(mnemonic##_D) { \
1688 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInputDoubleAccDestination, \
1689 kInputDouble##input); \
1690 }
1691
1692 #define CALL_TEST_NEON_HELPER_3DIFF(mnemonic, vdform, vnform, vmform, input_d, \
1693 input_n, input_m) \
1694 { \
1695 CALL_TEST_NEON_HELPER_2Op(mnemonic, vdform, vnform, vmform, input_d, \
1696 input_n, input_m); \
1697 }
1698
1699 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \
1700 SIMTEST(mnemonic##_8H) { \
1701 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8B, 8B, \
1702 kInput16bitsAccDestination, \
1703 kInput8bits##input, kInput8bits##input); \
1704 } \
1705 SIMTEST(mnemonic##2_8H) { \
1706 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 16B, 16B, \
1707 kInput16bitsAccDestination, \
1708 kInput8bits##input, kInput8bits##input); \
1709 }
1710
1711 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
1712 SIMTEST(mnemonic##_4S) { \
1713 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4H, 4H, \
1714 kInput32bitsAccDestination, \
1715 kInput16bits##input, kInput16bits##input); \
1716 } \
1717 SIMTEST(mnemonic##2_4S) { \
1718 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 8H, 8H, \
1719 kInput32bitsAccDestination, \
1720 kInput16bits##input, kInput16bits##input); \
1721 }
1722
1723 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) \
1724 SIMTEST(mnemonic##_2D) { \
1725 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2S, 2S, \
1726 kInput64bitsAccDestination, \
1727 kInput32bits##input, kInput32bits##input); \
1728 } \
1729 SIMTEST(mnemonic##2_2D) { \
1730 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 4S, 4S, \
1731 kInput64bitsAccDestination, \
1732 kInput32bits##input, kInput32bits##input); \
1733 }
1734
1735 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \
1736 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
1737 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
1738
1739 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \
1740 DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \
1741 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
1742 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
1743
1744 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
1745 SIMTEST(mnemonic##_S) { \
1746 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, S, H, H, kInput32bitsAccDestination, \
1747 kInput16bits##input, kInput16bits##input); \
1748 }
1749
1750 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \
1751 SIMTEST(mnemonic##_D) { \
1752 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, D, S, S, kInput64bitsAccDestination, \
1753 kInput32bits##input, kInput32bits##input); \
1754 }
1755
1756 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \
1757 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
1758 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
1759
1760 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input) \
1761 SIMTEST(mnemonic##_8H) { \
1762 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8H, 8B, \
1763 kInput16bitsAccDestination, \
1764 kInput16bits##input, kInput8bits##input); \
1765 } \
1766 SIMTEST(mnemonic##_4S) { \
1767 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4S, 4H, \
1768 kInput32bitsAccDestination, \
1769 kInput32bits##input, kInput16bits##input); \
1770 } \
1771 SIMTEST(mnemonic##_2D) { \
1772 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2D, 2S, \
1773 kInput64bitsAccDestination, \
1774 kInput64bits##input, kInput32bits##input); \
1775 } \
1776 SIMTEST(mnemonic##2_8H) { \
1777 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 8H, 16B, \
1778 kInput16bitsAccDestination, \
1779 kInput16bits##input, kInput8bits##input); \
1780 } \
1781 SIMTEST(mnemonic##2_4S) { \
1782 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 4S, 8H, \
1783 kInput32bitsAccDestination, \
1784 kInput32bits##input, kInput16bits##input); \
1785 } \
1786 SIMTEST(mnemonic##2_2D) { \
1787 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 2D, 4S, \
1788 kInput64bitsAccDestination, \
1789 kInput64bits##input, kInput32bits##input); \
1790 }
1791
1792 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input) \
1793 SIMTEST(mnemonic##_8B) { \
1794 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8B, 8H, 8H, \
1795 kInput8bitsAccDestination, \
1796 kInput16bits##input, kInput16bits##input); \
1797 } \
1798 SIMTEST(mnemonic##_4H) { \
1799 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4H, 4S, 4S, \
1800 kInput16bitsAccDestination, \
1801 kInput32bits##input, kInput32bits##input); \
1802 } \
1803 SIMTEST(mnemonic##_2S) { \
1804 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2S, 2D, 2D, \
1805 kInput32bitsAccDestination, \
1806 kInput64bits##input, kInput64bits##input); \
1807 } \
1808 SIMTEST(mnemonic##2_16B) { \
1809 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 16B, 8H, 8H, \
1810 kInput8bitsAccDestination, \
1811 kInput16bits##input, kInput16bits##input); \
1812 } \
1813 SIMTEST(mnemonic##2_8H) { \
1814 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 4S, 4S, \
1815 kInput16bitsAccDestination, \
1816 kInput32bits##input, kInput32bits##input); \
1817 } \
1818 SIMTEST(mnemonic##2_4S) { \
1819 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 2D, 2D, \
1820 kInput32bitsAccDestination, \
1821 kInput64bits##input, kInput64bits##input); \
1822 }
1823
1824 #define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, vdform, vnform, input_n, \
1825 input_imm) \
1826 { \
1827 CALL_TEST_NEON_HELPER_2OpImm(mnemonic, vdform, vnform, input_n, \
1828 input_imm); \
1829 }
1830
1831 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm) \
1832 SIMTEST(mnemonic##_8B_2OPIMM) { \
1833 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, 8B, kInput8bits##input, \
1834 kInput8bitsImm##input_imm); \
1835 } \
1836 SIMTEST(mnemonic##_16B_2OPIMM) { \
1837 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 16B, 16B, kInput8bits##input, \
1838 kInput8bitsImm##input_imm); \
1839 } \
1840 SIMTEST(mnemonic##_4H_2OPIMM) { \
1841 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, 4H, kInput16bits##input, \
1842 kInput16bitsImm##input_imm); \
1843 } \
1844 SIMTEST(mnemonic##_8H_2OPIMM) { \
1845 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, 8H, kInput16bits##input, \
1846 kInput16bitsImm##input_imm); \
1847 } \
1848 SIMTEST(mnemonic##_2S_2OPIMM) { \
1849 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInput32bits##input, \
1850 kInput32bitsImm##input_imm); \
1851 } \
1852 SIMTEST(mnemonic##_4S_2OPIMM) { \
1853 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInput32bits##input, \
1854 kInput32bitsImm##input_imm); \
1855 } \
1856 SIMTEST(mnemonic##_2D_2OPIMM) { \
1857 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInput64bits##input, \
1858 kInput64bitsImm##input_imm); \
1859 }
1860
1861 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \
1862 SIMTEST(mnemonic##_8B_2OPIMM) { \
1863 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, B, kInput8bits##input, \
1864 kInput8bitsImm##input_imm); \
1865 } \
1866 SIMTEST(mnemonic##_16B_2OPIMM) { \
1867 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 16B, B, kInput8bits##input, \
1868 kInput8bitsImm##input_imm); \
1869 } \
1870 SIMTEST(mnemonic##_4H_2OPIMM) { \
1871 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, H, kInput16bits##input, \
1872 kInput16bitsImm##input_imm); \
1873 } \
1874 SIMTEST(mnemonic##_8H_2OPIMM) { \
1875 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, H, kInput16bits##input, \
1876 kInput16bitsImm##input_imm); \
1877 } \
1878 SIMTEST(mnemonic##_2S_2OPIMM) { \
1879 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, S, kInput32bits##input, \
1880 kInput32bitsImm##input_imm); \
1881 } \
1882 SIMTEST(mnemonic##_4S_2OPIMM) { \
1883 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, S, kInput32bits##input, \
1884 kInput32bitsImm##input_imm); \
1885 } \
1886 SIMTEST(mnemonic##_2D_2OPIMM) { \
1887 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, D, kInput64bits##input, \
1888 kInput64bitsImm##input_imm); \
1889 }
1890
1891 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \
1892 SIMTEST(mnemonic##_8B_2OPIMM) { \
1893 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, 8H, kInput16bits##input, \
1894 kInput8bitsImm##input_imm); \
1895 } \
1896 SIMTEST(mnemonic##_4H_2OPIMM) { \
1897 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, 4S, kInput32bits##input, \
1898 kInput16bitsImm##input_imm); \
1899 } \
1900 SIMTEST(mnemonic##_2S_2OPIMM) { \
1901 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2D, kInput64bits##input, \
1902 kInput32bitsImm##input_imm); \
1903 } \
1904 SIMTEST(mnemonic##2_16B_2OPIMM) { \
1905 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 16B, 8H, kInput16bits##input, \
1906 kInput8bitsImm##input_imm); \
1907 } \
1908 SIMTEST(mnemonic##2_8H_2OPIMM) { \
1909 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 8H, 4S, kInput32bits##input, \
1910 kInput16bitsImm##input_imm); \
1911 } \
1912 SIMTEST(mnemonic##2_4S_2OPIMM) { \
1913 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 4S, 2D, kInput64bits##input, \
1914 kInput32bitsImm##input_imm); \
1915 }
1916
1917 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \
1918 SIMTEST(mnemonic##_B_2OPIMM) { \
1919 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, B, H, kInput16bits##input, \
1920 kInput8bitsImm##input_imm); \
1921 } \
1922 SIMTEST(mnemonic##_H_2OPIMM) { \
1923 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, H, S, kInput32bits##input, \
1924 kInput16bitsImm##input_imm); \
1925 } \
1926 SIMTEST(mnemonic##_S_2OPIMM) { \
1927 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, D, kInput64bits##input, \
1928 kInput32bitsImm##input_imm); \
1929 }
1930
1931 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \
1932 SIMTEST(mnemonic##_2S_2OPIMM) { \
1933 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInputFloat##Basic, \
1934 kInputDoubleImm##input_imm) \
1935 } \
1936 SIMTEST(mnemonic##_4S_2OPIMM) { \
1937 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInputFloat##input, \
1938 kInputDoubleImm##input_imm); \
1939 } \
1940 SIMTEST(mnemonic##_2D_2OPIMM) { \
1941 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInputDouble##input, \
1942 kInputDoubleImm##input_imm); \
1943 }
1944
1945 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \
1946 SIMTEST(mnemonic##_2S_2OPIMM) { \
1947 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInputFloat##Basic, \
1948 kInput32bitsImm##input_imm) \
1949 } \
1950 SIMTEST(mnemonic##_4S_2OPIMM) { \
1951 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInputFloat##input, \
1952 kInput32bitsImm##input_imm) \
1953 } \
1954 SIMTEST(mnemonic##_2D_2OPIMM) { \
1955 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInputDouble##input, \
1956 kInput64bitsImm##input_imm) \
1957 }
1958
1959 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \
1960 SIMTEST(mnemonic##_S_2OPIMM) { \
1961 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInputFloat##Basic, \
1962 kInput32bitsImm##input_imm) \
1963 } \
1964 SIMTEST(mnemonic##_D_2OPIMM) { \
1965 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInputDouble##input, \
1966 kInput64bitsImm##input_imm) \
1967 }
1968
1969 #define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm) \
1970 SIMTEST(mnemonic##_2S_2OPIMM) { \
1971 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInput32bits##input, \
1972 kInput32bitsImm##input_imm); \
1973 } \
1974 SIMTEST(mnemonic##_4S_2OPIMM) { \
1975 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInput32bits##input, \
1976 kInput32bitsImm##input_imm); \
1977 } \
1978 SIMTEST(mnemonic##_2D_2OPIMM) { \
1979 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInput64bits##input, \
1980 kInput64bitsImm##input_imm); \
1981 }
1982
1983 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \
1984 SIMTEST(mnemonic##_D_2OPIMM) { \
1985 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInput64bits##input, \
1986 kInput64bitsImm##input_imm); \
1987 }
1988
1989 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) \
1990 SIMTEST(mnemonic##_S_2OPIMM) { \
1991 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInput32bits##input, \
1992 kInput32bitsImm##input_imm); \
1993 } \
1994 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
1995
1996 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \
1997 SIMTEST(mnemonic##_D_2OPIMM) { \
1998 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInputDouble##input, \
1999 kInputDoubleImm##input_imm); \
2000 }
2001
2002 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm) \
2003 SIMTEST(mnemonic##_S_2OPIMM) { \
2004 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInputFloat##input, \
2005 kInputDoubleImm##input_imm); \
2006 } \
2007 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
2008
2009 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \
2010 SIMTEST(mnemonic##_B_2OPIMM) { \
2011 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, B, B, kInput8bits##input, \
2012 kInput8bitsImm##input_imm); \
2013 } \
2014 SIMTEST(mnemonic##_H_2OPIMM) { \
2015 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, H, H, kInput16bits##input, \
2016 kInput16bitsImm##input_imm); \
2017 } \
2018 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm)
2019
2020 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \
2021 SIMTEST(mnemonic##_8H_2OPIMM) { \
2022 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, 8B, kInput8bits##input, \
2023 kInput8bitsImm##input_imm); \
2024 } \
2025 SIMTEST(mnemonic##_4S_2OPIMM) { \
2026 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4H, kInput16bits##input, \
2027 kInput16bitsImm##input_imm); \
2028 } \
2029 SIMTEST(mnemonic##_2D_2OPIMM) { \
2030 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2S, kInput32bits##input, \
2031 kInput32bitsImm##input_imm); \
2032 } \
2033 SIMTEST(mnemonic##2_8H_2OPIMM) { \
2034 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 8H, 16B, kInput8bits##input, \
2035 kInput8bitsImm##input_imm); \
2036 } \
2037 SIMTEST(mnemonic##2_4S_2OPIMM) { \
2038 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 4S, 8H, kInput16bits##input, \
2039 kInput16bitsImm##input_imm); \
2040 } \
2041 SIMTEST(mnemonic##2_2D_2OPIMM) { \
2042 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 2D, 4S, kInput32bits##input, \
2043 kInput32bitsImm##input_imm); \
2044 }
2045
2046 #define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, vdform, vnform, vmform, \
2047 input_d, input_n, input_m, indices) \
2048 { \
2049 CALL_TEST_NEON_HELPER_ByElement(mnemonic, vdform, vnform, vmform, input_d, \
2050 input_n, input_m, indices); \
2051 }
2052
2053 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \
2054 SIMTEST(mnemonic##_4H_4H_H) { \
2055 CALL_TEST_NEON_HELPER_BYELEMENT( \
2056 mnemonic, 4H, 4H, H, kInput16bits##input_d, kInput16bits##input_n, \
2057 kInput16bits##input_m, kInputHIndices); \
2058 } \
2059 SIMTEST(mnemonic##_8H_8H_H) { \
2060 CALL_TEST_NEON_HELPER_BYELEMENT( \
2061 mnemonic, 8H, 8H, H, kInput16bits##input_d, kInput16bits##input_n, \
2062 kInput16bits##input_m, kInputHIndices); \
2063 } \
2064 SIMTEST(mnemonic##_2S_2S_S) { \
2065 CALL_TEST_NEON_HELPER_BYELEMENT( \
2066 mnemonic, 2S, 2S, S, kInput32bits##input_d, kInput32bits##input_n, \
2067 kInput32bits##input_m, kInputSIndices); \
2068 } \
2069 SIMTEST(mnemonic##_4S_4S_S) { \
2070 CALL_TEST_NEON_HELPER_BYELEMENT( \
2071 mnemonic, 4S, 4S, S, kInput32bits##input_d, kInput32bits##input_n, \
2072 kInput32bits##input_m, kInputSIndices); \
2073 }
2074
2075 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \
2076 SIMTEST(mnemonic##_H_H_H) { \
2077 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, H, H, H, kInput16bits##input_d, \
2078 kInput16bits##input_n, \
2079 kInput16bits##input_m, kInputHIndices); \
2080 } \
2081 SIMTEST(mnemonic##_S_S_S) { \
2082 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, S, S, kInput32bits##input_d, \
2083 kInput32bits##input_n, \
2084 kInput32bits##input_m, kInputSIndices); \
2085 }
2086
2087 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \
2088 SIMTEST(mnemonic##_2S_2S_S) { \
2089 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, 2S, 2S, S, kInputFloat##input_d, \
2090 kInputFloat##input_n, \
2091 kInputFloat##input_m, kInputSIndices); \
2092 } \
2093 SIMTEST(mnemonic##_4S_4S_S) { \
2094 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, 4S, 4S, S, kInputFloat##input_d, \
2095 kInputFloat##input_n, \
2096 kInputFloat##input_m, kInputSIndices); \
2097 } \
2098 SIMTEST(mnemonic##_2D_2D_D) { \
2099 CALL_TEST_NEON_HELPER_BYELEMENT( \
2100 mnemonic, 2D, 2D, D, kInputDouble##input_d, kInputDouble##input_n, \
2101 kInputDouble##input_m, kInputDIndices); \
2102 }
2103
2104 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \
2105 SIMTEST(mnemonic##_S_S_S) { \
2106 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, S, S, kInputFloat##inp_d, \
2107 kInputFloat##inp_n, kInputFloat##inp_m, \
2108 kInputSIndices); \
2109 } \
2110 SIMTEST(mnemonic##_D_D_D) { \
2111 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, D, D, D, kInputDouble##inp_d, \
2112 kInputDouble##inp_n, kInputDouble##inp_m, \
2113 kInputDIndices); \
2114 }
2115
2116 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
2117 SIMTEST(mnemonic##_4S_4H_H) { \
2118 CALL_TEST_NEON_HELPER_BYELEMENT( \
2119 mnemonic, 4S, 4H, H, kInput32bits##input_d, kInput16bits##input_n, \
2120 kInput16bits##input_m, kInputHIndices); \
2121 } \
2122 SIMTEST(mnemonic##2_4S_8H_H) { \
2123 CALL_TEST_NEON_HELPER_BYELEMENT( \
2124 mnemonic##2, 4S, 8H, H, kInput32bits##input_d, kInput16bits##input_n, \
2125 kInput16bits##input_m, kInputHIndices); \
2126 } \
2127 SIMTEST(mnemonic##_2D_2S_S) { \
2128 CALL_TEST_NEON_HELPER_BYELEMENT( \
2129 mnemonic, 2D, 2S, S, kInput64bits##input_d, kInput32bits##input_n, \
2130 kInput32bits##input_m, kInputSIndices); \
2131 } \
2132 SIMTEST(mnemonic##2_2D_4S_S) { \
2133 CALL_TEST_NEON_HELPER_BYELEMENT( \
2134 mnemonic##2, 2D, 4S, S, kInput64bits##input_d, kInput32bits##input_n, \
2135 kInput32bits##input_m, kInputSIndices); \
2136 }
2137
2138 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic, input_d, input_n, \
2139 input_m) \
2140 SIMTEST(mnemonic##_S_H_H) { \
2141 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, H, H, kInput32bits##input_d, \
2142 kInput16bits##input_n, \
2143 kInput16bits##input_m, kInputHIndices); \
2144 } \
2145 SIMTEST(mnemonic##_D_S_S) { \
2146 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, D, S, S, kInput64bits##input_d, \
2147 kInput32bits##input_n, \
2148 kInput32bits##input_m, kInputSIndices); \
2149 }
2150
2151 #define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, variant, input_d, input_imm1, \
2152 input_n, input_imm2) \
2153 { \
2154 CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, mnemonic, \
2155 variant, variant, input_d, input_imm1, \
2156 input_n, input_imm2); \
2157 }
2158
2159 #define DEFINE_TEST_NEON_2OP2IMM(mnemonic, input_d, input_imm1, input_n, \
2160 input_imm2) \
2161 SIMTEST(mnemonic##_B) { \
2162 CALL_TEST_NEON_HELPER_2OP2IMM( \
2163 mnemonic, 16B, kInput8bits##input_d, kInput8bitsImm##input_imm1, \
2164 kInput8bits##input_n, kInput8bitsImm##input_imm2); \
2165 } \
2166 SIMTEST(mnemonic##_H) { \
2167 CALL_TEST_NEON_HELPER_2OP2IMM( \
2168 mnemonic, 8H, kInput16bits##input_d, kInput16bitsImm##input_imm1, \
2169 kInput16bits##input_n, kInput16bitsImm##input_imm2); \
2170 } \
2171 SIMTEST(mnemonic##_S) { \
2172 CALL_TEST_NEON_HELPER_2OP2IMM( \
2173 mnemonic, 4S, kInput32bits##input_d, kInput32bitsImm##input_imm1, \
2174 kInput32bits##input_n, kInput32bitsImm##input_imm2); \
2175 } \
2176 SIMTEST(mnemonic##_D) { \
2177 CALL_TEST_NEON_HELPER_2OP2IMM( \
2178 mnemonic, 2D, kInput64bits##input_d, kInput64bitsImm##input_imm1, \
2179 kInput64bits##input_n, kInput64bitsImm##input_imm2); \
2180 }
2181
2182 // Advanced SIMD copy.
2183 DEFINE_TEST_NEON_2OP2IMM(ins, Basic, LaneCountFromZero, Basic,
2184 LaneCountFromZero)
2185 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)
2186
2187 // Advanced SIMD scalar copy.
2188 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero)
2189
2190 // Advanced SIMD three same.
2191 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic)
2192 DEFINE_TEST_NEON_3SAME(sqadd, Basic)
2193 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic)
2194 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic)
2195 DEFINE_TEST_NEON_3SAME(sqsub, Basic)
2196 DEFINE_TEST_NEON_3SAME(cmgt, Basic)
2197 DEFINE_TEST_NEON_3SAME(cmge, Basic)
2198 DEFINE_TEST_NEON_3SAME(sshl, Basic)
2199 DEFINE_TEST_NEON_3SAME(sqshl, Basic)
2200 DEFINE_TEST_NEON_3SAME(srshl, Basic)
2201 DEFINE_TEST_NEON_3SAME(sqrshl, Basic)
2202 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic)
2203 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic)
2204 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic)
2205 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic)
2206 DEFINE_TEST_NEON_3SAME(add, Basic)
2207 DEFINE_TEST_NEON_3SAME(cmtst, Basic)
2208 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic)
2209 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic)
2210 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic)
2211 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic)
2212 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic)
2213 DEFINE_TEST_NEON_3SAME(addp, Basic)
2214 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic)
2215 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic)
2216 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic)
2217 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic)
2218 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic)
2219 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic)
2220 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic)
2221 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic)
2222 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic)
2223 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic)
2224 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic)
2225 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic)
2226 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic)
2227 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic)
2228 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic)
2229 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic)
2230 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic)
2231 DEFINE_TEST_NEON_3SAME(uqadd, Basic)
2232 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic)
2233 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic)
2234 DEFINE_TEST_NEON_3SAME(uqsub, Basic)
2235 DEFINE_TEST_NEON_3SAME(cmhi, Basic)
2236 DEFINE_TEST_NEON_3SAME(cmhs, Basic)
2237 DEFINE_TEST_NEON_3SAME(ushl, Basic)
2238 DEFINE_TEST_NEON_3SAME(uqshl, Basic)
2239 DEFINE_TEST_NEON_3SAME(urshl, Basic)
2240 DEFINE_TEST_NEON_3SAME(uqrshl, Basic)
2241 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic)
2242 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic)
2243 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic)
2244 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic)
2245 DEFINE_TEST_NEON_3SAME(sub, Basic)
2246 DEFINE_TEST_NEON_3SAME(cmeq, Basic)
2247 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic)
2248 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic)
2249 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic)
2250 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic)
2251 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic)
2252 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic)
2253 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic)
2254 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic)
2255 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic)
2256 DEFINE_TEST_NEON_3SAME_FP(facge, Basic)
2257 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic)
2258 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic)
2259 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic)
2260 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic)
2261 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic)
2262 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic)
2263 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic)
2264 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic)
2265 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic)
2266 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic)
2267 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic)
2268
2269 // Advanced SIMD scalar three same.
2270 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic)
2271 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic)
2272 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic)
2273 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic)
2274 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic)
2275 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic)
2276 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic)
2277 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic)
2278 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic)
2279 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic)
2280 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic)
2281 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic)
2282 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic)
2283 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic)
2284 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic)
2285 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic)
2286 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic)
2287 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic)
2288 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic)
2289 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic)
2290 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic)
2291 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic)
2292 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic)
2293 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic)
2294 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic)
2295 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic)
2296 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic)
2297 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic)
2298 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic)
2299 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic)
2300 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic)
2301
2302 // Advanced SIMD three different.
2303 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic)
2304 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic)
2305 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic)
2306 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic)
2307 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic)
2308 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic)
2309 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic)
2310 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic)
2311 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic)
2312 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic)
2313 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic)
2314 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic)
2315 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic)
2316 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic)
2317 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic)
2318 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic)
2319 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic)
2320 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic)
2321 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic)
2322 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic)
2323 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic)
2324 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic)
2325 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic)
2326 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic)
2327 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic)
2328 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic)
2329
2330 // Advanced SIMD scalar three different.
2331 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic)
2332 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic)
2333 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic)
2334
2335 // Advanced SIMD scalar pairwise.
2336 SIMTEST(addp_SCALAR) {
2337 CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic);
2338 }
2339 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic)
2340 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic)
2341 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic)
2342 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic)
2343 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic)
2344
2345 // Advanced SIMD shift by immediate.
2346 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth)
2347 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth)
2348 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth)
2349 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth)
2350 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero)
2351 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero)
2352 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth)
2353 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)
2354 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)
2355 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)
2356 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)
2357 DEFINE_TEST_NEON_2OPIMM_SD(scvtf, FixedPointConversions,
2358 TypeWidthFromZeroToWidth)
2359 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
2360 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)
2361 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth)
2362 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth)
2363 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth)
2364 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth)
2365 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero)
2366 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero)
2367 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero)
2368 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth)
2369 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)
2370 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)
2371 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)
2372 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)
2373 DEFINE_TEST_NEON_2OPIMM_SD(ucvtf, FixedPointConversions,
2374 TypeWidthFromZeroToWidth)
2375 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
2376
2377 // Advanced SIMD scalar shift by immediate..
2378 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth)
2379 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth)
2380 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth)
2381 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth)
2382 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)
2383 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)
2384 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)
2385 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)
2386 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf, FixedPointConversions,
2387 TypeWidthFromZeroToWidth)
2388 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
2389 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)
2390 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth)
2391 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth)
2392 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth)
2393 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth)
2394 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero)
2395 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero)
2396 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero)
2397 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)
2398 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)
2399 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)
2400 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)
2401 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf, FixedPointConversions,
2402 TypeWidthFromZeroToWidth)
2403 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
2404
2405 // Advanced SIMD two-register miscellaneous.
2406 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic)
2407 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic)
2408 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic)
2409 DEFINE_TEST_NEON_2SAME(suqadd, Basic)
2410 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic)
2411 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic)
2412 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic)
2413 DEFINE_TEST_NEON_2SAME(sqabs, Basic)
2414 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero)
2415 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero)
2416 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero)
2417 DEFINE_TEST_NEON_2SAME(abs, Basic)
2418 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic)
2419 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic)
2420 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions)
2421 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions)
2422 DEFINE_TEST_NEON_2SAME_FP(frintn, Conversions)
2423 DEFINE_TEST_NEON_2SAME_FP(frintm, Conversions)
2424 DEFINE_TEST_NEON_2SAME_FP(fcvtns, Conversions)
2425 DEFINE_TEST_NEON_2SAME_FP(fcvtms, Conversions)
2426 DEFINE_TEST_NEON_2SAME_FP(fcvtas, Conversions)
2427 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
2428 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero)
2429 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero)
2430 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero)
2431 DEFINE_TEST_NEON_2SAME_FP(fabs, Basic)
2432 DEFINE_TEST_NEON_2SAME_FP(frintp, Conversions)
2433 DEFINE_TEST_NEON_2SAME_FP(frintz, Conversions)
2434 DEFINE_TEST_NEON_2SAME_FP(fcvtps, Conversions)
2435 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
2436 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic)
2437 DEFINE_TEST_NEON_2SAME_FP(frecpe, Basic)
2438 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic)
2439 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic)
2440 DEFINE_TEST_NEON_2SAME(usqadd, Basic)
2441 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic)
2442 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic)
2443 DEFINE_TEST_NEON_2SAME(sqneg, Basic)
2444 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero)
2445 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero)
2446 DEFINE_TEST_NEON_2SAME(neg, Basic)
2447 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic)
2448 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL)
2449 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic)
2450 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions)
2451 DEFINE_TEST_NEON_2SAME_FP(frinta, Conversions)
2452 DEFINE_TEST_NEON_2SAME_FP(frintx, Conversions)
2453 DEFINE_TEST_NEON_2SAME_FP(fcvtnu, Conversions)
2454 DEFINE_TEST_NEON_2SAME_FP(fcvtmu, Conversions)
2455 DEFINE_TEST_NEON_2SAME_FP(fcvtau, Conversions)
2456 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
2457 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic)
2458 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic)
2459 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero)
2460 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero)
2461 DEFINE_TEST_NEON_2SAME_FP(fneg, Basic)
2462 DEFINE_TEST_NEON_2SAME_FP(frinti, Conversions)
2463 DEFINE_TEST_NEON_2SAME_FP(fcvtpu, Conversions)
2464 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
2465 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic)
2466 DEFINE_TEST_NEON_2SAME_FP(frsqrte, Basic)
2467 DEFINE_TEST_NEON_2SAME_FP(fsqrt, Basic)
2468
2469 // Advanced SIMD scalar two-register miscellaneous.
2470 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic)
2471 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic)
2472 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero)
2473 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero)
2474 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero)
2475 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic)
2476 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic)
2477 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtns, Conversions)
2478 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtms, Conversions)
2479 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtas, Conversions)
2480 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
2481 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmgt, Basic, Zero)
2482 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmeq, Basic, Zero)
2483 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmlt, Basic, Zero)
2484 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtps, Conversions)
2485 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
2486 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpe, Basic)
2487 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpx, Basic)
2488 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic)
2489 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic)
2490 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero)
2491 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero)
2492 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic)
2493 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic)
2494 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic)
2495 SIMTEST(fcvtxn_SCALAR) {
2496 CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions);
2497 }
2498 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtnu, Conversions)
2499 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtmu, Conversions)
2500 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtau, Conversions)
2501 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
2502 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmge, Basic, Zero)
2503 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmle, Basic, Zero)
2504 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtpu, Conversions)
2505 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
2506 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frsqrte, Basic)
2507
2508 // Advanced SIMD across lanes.
2509 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic)
2510 DEFINE_TEST_NEON_ACROSS(smaxv, Basic)
2511 DEFINE_TEST_NEON_ACROSS(sminv, Basic)
2512 DEFINE_TEST_NEON_ACROSS(addv, Basic)
2513 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic)
2514 DEFINE_TEST_NEON_ACROSS(umaxv, Basic)
2515 DEFINE_TEST_NEON_ACROSS(uminv, Basic)
2516 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic)
2517 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic)
2518 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic)
2519 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic)
2520
2521 // Advanced SIMD permute.
2522 DEFINE_TEST_NEON_3SAME(uzp1, Basic)
2523 DEFINE_TEST_NEON_3SAME(trn1, Basic)
2524 DEFINE_TEST_NEON_3SAME(zip1, Basic)
2525 DEFINE_TEST_NEON_3SAME(uzp2, Basic)
2526 DEFINE_TEST_NEON_3SAME(trn2, Basic)
2527 DEFINE_TEST_NEON_3SAME(zip2, Basic)
2528
2529 // Advanced SIMD vector x indexed element.
2530 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic)
2531 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic)
2532 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic)
2533 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic)
2534 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic)
2535 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic)
2536 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic)
2537 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic)
2538 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic)
2539 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic)
2540 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic)
2541 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic)
2542 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic)
2543 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic)
2544 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic)
2545 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic)
2546 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic)
2547 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic)
2548
2549 // Advanced SIMD scalar x indexed element.
2550 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic)
2551 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic)
2552 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic)
2553 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic)
2554 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic)
2555 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic)
2556 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic)
2557 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic)
2558 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698