Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(672)

Side by Side Diff: test/cctest/test-simulator-arm64.cc

Issue 2622643005: ARM64: Add NEON support (Closed)
Patch Set: Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <cmath>
9 #include <limits>
10
11 #include "src/arm64/decoder-arm64-inl.h"
12 #include "src/arm64/disasm-arm64.h"
13 #include "src/arm64/simulator-arm64.h"
14 #include "src/arm64/utils-arm64.h"
15 #include "src/base/platform/platform.h"
16 #include "src/base/utils/random-number-generator.h"
17 #include "src/macro-assembler.h"
18 #include "test/cctest/cctest.h"
19 #include "test/cctest/test-simulator-inputs-arm64.h"
20 #include "test/cctest/test-simulator-traces-arm64.h"
21 #include "test/cctest/test-utils-arm64.h"
22
23 using namespace v8::internal;
24
25 // Test infrastructure.
26 //
27 // Tests are functions which accept no parameters and have no return values.
28 // The testing code should not perform an explicit return once completed. For
29 // example to test the mov immediate instruction a very simple test would be:
30 //
31 // SIMTEST(mov_x0_one) {
32 // SETUP();
33 //
34 // START();
35 // __ mov(x0, Operand(1));
36 // END();
37 //
38 // RUN();
39 //
40 // CHECK_EQUAL_64(1, x0);
41 //
42 // TEARDOWN();
43 // }
44 //
45 // Within a START ... END block all registers but sp can be modified. sp has to
46 // be explicitly saved/restored. The END() macro replaces the function return
47 // so it may appear multiple times in a test if the test has multiple exit
48 // points.
49 //
50 // Once the test has been run all integer and floating point registers as well
51 // as flags are accessible through a RegisterDump instance, see
52 // utils-arm64.cc for more info on RegisterDump.
bbudge 2017/01/31 01:41:32 It seems to be in test-utils-arm64.h
martyn.capewell 2017/02/03 11:01:31 Done.
53 //
54 // We provide some helper assert to handle common cases:
55 //
56 // CHECK_EQUAL_32(int32_t, int_32t)
bbudge 2017/01/31 01:41:32 nit int32_t
martyn.capewell 2017/02/03 11:01:31 Done.
57 // CHECK_EQUAL_FP32(float, float)
58 // CHECK_EQUAL_32(int32_t, W register)
59 // CHECK_EQUAL_FP32(float, S register)
60 // CHECK_EQUAL_64(int64_t, int_64t)
bbudge 2017/01/31 01:41:32 int64_t
martyn.capewell 2017/02/03 11:01:31 Done.
61 // CHECK_EQUAL_FP64(double, double)
62 // CHECK_EQUAL_64(int64_t, X register)
63 // CHECK_EQUAL_64(X register, X register)
64 // CHECK_EQUAL_FP64(double, D register)
65 //
66 // e.g. CHECK_EQUAL_64(0.5, d30);
67 //
68 // If more advance computation is required before the assert then access the
69 // RegisterDump named core directly:
70 //
71 // CHECK_EQUAL_64(0x1234, core.xreg(0) & 0xffff);
72
73 #if 0 // TODO(all): enable.
74 static v8::Persistent<v8::Context> env;
75
76 static void InitializeVM() {
77 if (env.IsEmpty()) {
78 env = v8::Context::New();
79 }
80 }
81 #endif
82
83 #define __ masm.
84 #define SIMTEST(name) TEST(SIM_##name)
85
86 #define BUF_SIZE 8192
87 #define SETUP() SETUP_SIZE(BUF_SIZE)
88
89 #define INIT_V8() CcTest::InitializeVM();
90
91 #ifdef USE_SIMULATOR
92
93 // Run tests with the simulator.
94 #define SETUP_SIZE(buf_size) \
95 Isolate* isolate = CcTest::i_isolate(); \
96 HandleScope scope(isolate); \
97 CHECK(isolate != NULL); \
98 byte* buf = new byte[buf_size]; \
99 MacroAssembler masm(isolate, buf, buf_size, \
100 v8::internal::CodeObjectRequired::kYes); \
101 Decoder<DispatchingDecoderVisitor>* decoder = \
102 new Decoder<DispatchingDecoderVisitor>(); \
103 Simulator simulator(decoder); \
104 RegisterDump core;
105
106 // Reset the assembler and simulator, so that instructions can be generated,
107 // but don't actually emit any code. This can be used by tests that need to
108 // emit instructions at the start of the buffer. Note that START_AFTER_RESET
109 // must be called before any callee-saved register is modified, and before an
110 // END is encountered.
111 //
112 // Most tests should call START, rather than call RESET directly.
113 #define RESET() \
114 __ Reset(); \
115 simulator.ResetState();
116
117 #define START_AFTER_RESET() \
118 __ SetStackPointer(csp); \
119 __ PushCalleeSavedRegisters(); \
120 __ Debug("Start test.", __LINE__, TRACE_ENABLE | LOG_ALL);
121
122 #define START() \
123 RESET(); \
124 START_AFTER_RESET();
125
126 #define RUN() simulator.RunFrom(reinterpret_cast<Instruction*>(buf))
127
128 #define END() \
129 __ Debug("End test.", __LINE__, TRACE_DISABLE | LOG_ALL); \
130 core.Dump(&masm); \
131 __ PopCalleeSavedRegisters(); \
132 __ Ret(); \
133 __ GetCode(NULL);
134
135 #define TEARDOWN() delete[] buf;
136
137 #else // ifdef USE_SIMULATOR.
138 // Run the test on real hardware or models.
139 #define SETUP_SIZE(buf_size) \
140 Isolate* isolate = CcTest::i_isolate(); \
141 HandleScope scope(isolate); \
142 CHECK(isolate != NULL); \
143 size_t actual_size; \
144 byte* buf = static_cast<byte*>( \
145 v8::base::OS::Allocate(buf_size, &actual_size, true)); \
146 MacroAssembler masm(isolate, buf, actual_size, \
147 v8::internal::CodeObjectRequired::kYes); \
148 RegisterDump core;
149
150 #define RESET() \
151 __ Reset(); \
152 /* Reset the machine state (like simulator.ResetState()). */ \
153 __ Msr(NZCV, xzr); \
154 __ Msr(FPCR, xzr);
155
156 #define START_AFTER_RESET() \
157 __ SetStackPointer(csp); \
158 __ PushCalleeSavedRegisters();
159
160 #define START() \
161 RESET(); \
162 START_AFTER_RESET();
163
164 #define RUN() \
165 Assembler::FlushICache(isolate, buf, masm.SizeOfGeneratedCode()); \
166 { \
167 void (*test_function)(void); \
168 memcpy(&test_function, &buf, sizeof(buf)); \
169 test_function(); \
170 }
171
172 #define END() \
173 core.Dump(&masm); \
174 __ PopCalleeSavedRegisters(); \
175 __ Ret(); \
176 __ GetCode(NULL);
177
178 #define TEARDOWN() v8::base::OS::Free(buf, actual_size);
179
180 #endif // ifdef USE_SIMULATOR.
181
182 #define CHECK_EQUAL_NZCV(expected) CHECK(EqualNzcv(expected, core.flags_nzcv()))
183
184 #define CHECK_EQUAL_REGISTERS(expected) CHECK(EqualRegisters(&expected, &core))
185
186 #define CHECK_EQUAL_32(expected, result) \
187 CHECK(Equal32(static_cast<uint32_t>(expected), &core, result))
188
189 #define CHECK_EQUAL_FP32(expected, result) \
190 CHECK(EqualFP32(expected, &core, result))
191
192 #define CHECK_EQUAL_64(expected, result) CHECK(Equal64(expected, &core, result))
193
194 #define CHECK_EQUAL_FP64(expected, result) \
195 CHECK(EqualFP64(expected, &core, result))
196
197 #ifdef DEBUG
198 #define CHECK_LITERAL_POOL_SIZE(expected) \
199 CHECK((expected) == (__ LiteralPoolSize()))
200 #else
201 #define CHECK_LITERAL_POOL_SIZE(expected) ((void)0)
202 #endif
203
204 // The maximum number of errors to report in detail for each test.
205 static const unsigned kErrorReportLimit = 8;
206
207 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd,
208 const VRegister& vn);
209 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd,
210 const VRegister& vn,
211 const VRegister& vm);
212 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd,
213 const VRegister& vn,
214 const VRegister& vm,
215 int vm_index);
216 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(
217 const VRegister& vd, int imm1, const VRegister& vn, int imm2);
218
219 // This helps using the same typename for both the function pointer
220 // and the array of immediates passed to helper routines.
221 template <typename T>
222 class Test2OpImmediateNEONHelper_t {
223 public:
224 typedef void (MacroAssembler::*mnemonic)(const VRegister& vd,
225 const VRegister& vn, T imm);
226 };
227
228 namespace {
229
230 // Maximum number of hex characters required to represent values of either
231 // templated type.
232 template <typename Ta, typename Tb>
233 unsigned MaxHexCharCount() {
234 unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb)));
235 return (count * 8) / 4;
236 }
237
238 // ==== Tests for instructions of the form <INST> VReg, VReg. ====
239
240 void Test1OpNEON_Helper(Test1OpNEONHelper_t helper, uintptr_t inputs_n,
241 unsigned inputs_n_length, uintptr_t results,
242 VectorFormat vd_form, VectorFormat vn_form) {
243 DCHECK_NE(vd_form, kFormatUndefined);
244 DCHECK_NE(vn_form, kFormatUndefined);
245
246 SETUP();
247 START();
248
249 // Roll up the loop to keep the code size down.
250 Label loop_n;
251
252 Register out = x0;
253 Register inputs_n_base = x1;
254 Register inputs_n_last_16bytes = x3;
255 Register index_n = x5;
256
257 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
258 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
259
260 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
261 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
262 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
263 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
264 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
265
266 // These will be either a D- or a Q-register form, with a single lane
267 // (for use in scalar load and store operations).
268 VRegister vd = VRegister::Create(0, vd_bits);
269 VRegister vn = v1.V16B();
270 VRegister vntmp = v3.V16B();
271
272 // These will have the correct format for use when calling 'helper'.
273 VRegister vd_helper = VRegister::Create(0, vd_bits, vd_lane_count);
274 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count);
275
276 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
277 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits);
278
279 __ Mov(out, results);
280
281 __ Mov(inputs_n_base, inputs_n);
282 __ Mov(inputs_n_last_16bytes,
283 inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
284
285 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
286
287 __ Mov(index_n, 0);
288 __ Bind(&loop_n);
289
290 __ Ldr(vntmp_single,
291 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
292 __ Ext(vn, vn, vntmp, vn_lane_bytes);
293
294 // Set the destination to zero.
295
296 // TODO(all): Setting the destination to values other than zero might be a
297 // better test for instructions such as sqxtn2 which may leave parts of V
298 // registers unchanged.
299 __ Movi(vd.V16B(), 0);
300
301 (masm.*helper)(vd_helper, vn_helper);
302
303 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));
304
305 __ Add(index_n, index_n, 1);
306 __ Cmp(index_n, inputs_n_length);
307 __ B(lo, &loop_n);
308
309 END();
310 RUN();
311 TEARDOWN();
312 }
313
314 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
315 // arrays of rawbit representation of input values. This ensures that
316 // exact bit comparisons can be performed.
317 template <typename Td, typename Tn>
318 void Test1OpNEON(const char* name, Test1OpNEONHelper_t helper,
319 const Tn inputs_n[], unsigned inputs_n_length,
320 const Td expected[], unsigned expected_length,
321 VectorFormat vd_form, VectorFormat vn_form) {
322 DCHECK_GT(inputs_n_length, 0U);
323
324 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
325 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
326 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
327
328 const unsigned results_length = inputs_n_length;
329 Td* results = new Td[results_length * vd_lane_count];
bbudge 2017/01/31 01:41:31 Use std::vector rather than manage raw array point
martyn.capewell 2017/02/03 11:01:31 I could do this, but I'd need to push dummy values
bbudge 2017/02/08 01:39:11 If the types Td are default constructible, you can
martyn.capewell 2017/02/15 11:51:00 Done.
330 const unsigned lane_bit = sizeof(Td) * 8;
331 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
332
333 Test1OpNEON_Helper(helper, reinterpret_cast<uintptr_t>(inputs_n),
334 inputs_n_length, reinterpret_cast<uintptr_t>(results),
335 vd_form, vn_form);
336
337 if (CcTest::sim_test_trace()) {
338 // Print the results.
339 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
340 for (unsigned iteration = 0; iteration < results_length; iteration++) {
341 printf(" ");
342 // Output a separate result for each element of the result vector.
343 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
344 unsigned index = lane + (iteration * vd_lane_count);
345 printf(" 0x%0*" PRIx64 ",", lane_len_in_hex,
346 static_cast<uint64_t>(results[index]));
347 }
348 printf("\n");
349 }
350
351 printf("};\n");
352 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", name,
353 results_length);
354 } else {
355 // Check the results.
356 CHECK(expected_length == results_length);
357 unsigned error_count = 0;
358 unsigned d = 0;
359 const char* padding = " ";
360 DCHECK_GE(strlen(padding), (lane_len_in_hex + 1));
361 for (unsigned n = 0; n < inputs_n_length; n++, d++) {
362 bool error_in_vector = false;
363
364 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
365 unsigned output_index = (n * vd_lane_count) + lane;
366
367 if (results[output_index] != expected[output_index]) {
368 error_in_vector = true;
369 break;
370 }
371 }
372
373 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
374 printf("%s\n", name);
375 printf(" Vn%.*s| Vd%.*s| Expected\n", lane_len_in_hex + 1, padding,
376 lane_len_in_hex + 1, padding);
377
378 const unsigned first_index_n =
379 inputs_n_length - (16 / vn_lane_bytes) + n + 1;
380
381 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
382 lane++) {
383 unsigned output_index = (n * vd_lane_count) + lane;
384 unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
385
386 printf(
387 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64
388 " "
389 "| 0x%0*" PRIx64 "\n",
390 results[output_index] != expected[output_index] ? '*' : ' ',
391 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]),
392 lane_len_in_hex, static_cast<uint64_t>(results[output_index]),
393 lane_len_in_hex, static_cast<uint64_t>(expected[output_index]));
394 }
395 }
396 }
397 DCHECK_EQ(d, expected_length);
398 if (error_count > kErrorReportLimit) {
399 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
400 }
401 DCHECK_EQ(error_count, 0U);
402 }
403 delete[] results;
404 }
405
406 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ====
407 // where <V> is one of B, H, S or D registers.
408 // e.g. saddlv H1, v0.8B
409
410 // TODO(all): Change tests to store all lanes of the resulting V register.
411 // Some tests store all 128 bits of the resulting V register to
412 // check the simulator's behaviour on the rest of the register.
413 // This is better than storing the affected lanes only.
414 // Change any tests such as the 'Across' template to do the same.
415
416 void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper, uintptr_t inputs_n,
417 unsigned inputs_n_length, uintptr_t results,
418 VectorFormat vd_form, VectorFormat vn_form) {
419 DCHECK_NE(vd_form, kFormatUndefined);
420 DCHECK_NE(vn_form, kFormatUndefined);
421
422 SETUP();
423 START();
424
425 // Roll up the loop to keep the code size down.
426 Label loop_n;
427
428 Register out = x0;
429 Register inputs_n_base = x1;
430 Register inputs_n_last_vector = x3;
431 Register index_n = x5;
432
433 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
434 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
435 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
436 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
437 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
438 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
439
440 // Test destructive operations by (arbitrarily) using the same register for
441 // B and S lane sizes.
442 bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize);
443
444 // These will be either a D- or a Q-register form, with a single lane
445 // (for use in scalar load and store operations).
446 // Create two aliases for v8; the first is the destination for the tested
447 // instruction, the second, the whole Q register to check the results.
448 VRegister vd = VRegister::Create(0, vd_bits);
449 VRegister vdstr = VRegister::Create(0, kQRegSizeInBits);
450
451 VRegister vn = VRegister::Create(1, vn_bits);
452 VRegister vntmp = VRegister::Create(3, vn_bits);
453
454 // These will have the correct format for use when calling 'helper'.
455 VRegister vd_helper = VRegister::Create(0, vn_bits, vn_lane_count);
456 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count);
457
458 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
459 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits);
460
461 // Same registers for use in the 'ext' instructions.
462 VRegister vn_ext = (kDRegSizeInBits == vn_bits) ? vn.V8B() : vn.V16B();
463 VRegister vntmp_ext =
464 (kDRegSizeInBits == vn_bits) ? vntmp.V8B() : vntmp.V16B();
465
466 __ Mov(out, results);
467
468 __ Mov(inputs_n_base, inputs_n);
469 __ Mov(inputs_n_last_vector,
470 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
471
472 __ Ldr(vn, MemOperand(inputs_n_last_vector));
473
474 __ Mov(index_n, 0);
475 __ Bind(&loop_n);
476
477 __ Ldr(vntmp_single,
478 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
479 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
480
481 if (destructive) {
482 __ Mov(vd_helper, vn_helper);
483 (masm.*helper)(vd, vd_helper);
484 } else {
485 (masm.*helper)(vd, vn_helper);
486 }
487
488 __ Str(vdstr, MemOperand(out, kQRegSize, PostIndex));
489
490 __ Add(index_n, index_n, 1);
491 __ Cmp(index_n, inputs_n_length);
492 __ B(lo, &loop_n);
493
494 END();
495 RUN();
496 TEARDOWN();
497 }
498
499 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
500 // arrays of rawbit representation of input values. This ensures that
501 // exact bit comparisons can be performed.
502 template <typename Td, typename Tn>
503 void Test1OpAcrossNEON(const char* name, Test1OpNEONHelper_t helper,
504 const Tn inputs_n[], unsigned inputs_n_length,
505 const Td expected[], unsigned expected_length,
506 VectorFormat vd_form, VectorFormat vn_form) {
507 DCHECK_GT(inputs_n_length, 0U);
508
509 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
510 const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form);
511
512 const unsigned results_length = inputs_n_length;
513 Td* results = new Td[results_length * vd_lanes_per_q];
514 const unsigned lane_bit = sizeof(Td) * 8;
515 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
516
517 Test1OpAcrossNEON_Helper(
518 helper, reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
519 reinterpret_cast<uintptr_t>(results), vd_form, vn_form);
520
521 if (CcTest::sim_test_trace()) {
522 // Print the results.
523 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
524 for (unsigned iteration = 0; iteration < results_length; iteration++) {
525 printf(" ");
526 // Output a separate result for each element of the result vector.
527 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
528 unsigned index = lane + (iteration * vd_lane_count);
529 printf(" 0x%0*" PRIx64 ",", lane_len_in_hex,
530 static_cast<uint64_t>(results[index]));
531 }
532 printf("\n");
533 }
534
535 printf("};\n");
536 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", name,
537 results_length);
538 } else {
539 // Check the results.
540 DCHECK_EQ(expected_length, results_length);
541 unsigned error_count = 0;
542 unsigned d = 0;
543 const char* padding = " ";
544 DCHECK_GE(strlen(padding), (lane_len_in_hex + 1));
545 for (unsigned n = 0; n < inputs_n_length; n++, d++) {
546 bool error_in_vector = false;
547
548 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
549 unsigned expected_index = (n * vd_lane_count) + lane;
550 unsigned results_index = (n * vd_lanes_per_q) + lane;
551
552 if (results[results_index] != expected[expected_index]) {
553 error_in_vector = true;
554 break;
555 }
556
557 // For across operations, the remaining lanes should be zero.
558 for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) {
559 unsigned results_index = (n * vd_lanes_per_q) + lane;
560 if (results[results_index] != 0) {
561 error_in_vector = true;
562 break;
563 }
564 }
565 }
566
567 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
568 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
569
570 printf("%s\n", name);
571 printf(" Vn%.*s| Vd%.*s| Expected\n", lane_len_in_hex + 1, padding,
572 lane_len_in_hex + 1, padding);
573
574 // TODO(all): In case of an error, all tests print out as many elements
575 // as there are lanes in the output or input vectors. This way the
576 // viewer can read all the values that were needed for the operation
577 // but the output contains also unnecessary values. These prints can be
578 // improved according to the arguments passed to test functions.
579 // This output for the 'Across' category has the required modifications.
bbudge 2017/01/31 01:41:31 It's not clear what is "to be done" here.
martyn.capewell 2017/02/03 11:01:31 I think it's saying that, on error, it prints out
bbudge 2017/02/08 01:39:11 OK
580 for (unsigned lane = 0; lane < vn_lane_count; lane++) {
581 unsigned results_index =
582 (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane);
583 unsigned input_index_n =
584 (inputs_n_length - vn_lane_count + n + 1 + lane) %
585 inputs_n_length;
586
587 Td expect = 0;
588 if ((vn_lane_count - 1) == lane) {
589 // This is the last lane to be printed, ie. the least-significant
590 // lane, so use the expected value; any other lane should be zero.
591 unsigned expected_index = n * vd_lane_count;
592 expect = expected[expected_index];
593 }
594 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
595 results[results_index] != expect ? '*' : ' ', lane_len_in_hex,
596 static_cast<uint64_t>(inputs_n[input_index_n]),
597 lane_len_in_hex, static_cast<uint64_t>(results[results_index]),
598 lane_len_in_hex, static_cast<uint64_t>(expect));
599 }
600 }
601 }
602 DCHECK_EQ(d, expected_length);
603 if (error_count > kErrorReportLimit) {
604 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
605 }
606 DCHECK_EQ(error_count, 0U);
607 }
608 delete[] results;
609 }
610
611 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ====
612
613 void Test2OpNEON_Helper(Test2OpNEONHelper_t helper, uintptr_t inputs_d,
614 uintptr_t inputs_n, unsigned inputs_n_length,
615 uintptr_t inputs_m, unsigned inputs_m_length,
616 uintptr_t results, VectorFormat vd_form,
617 VectorFormat vn_form, VectorFormat vm_form) {
618 DCHECK_NE(vd_form, kFormatUndefined);
619 DCHECK_NE(vn_form, kFormatUndefined);
620 DCHECK_NE(vm_form, kFormatUndefined);
621
622 SETUP();
623 START();
624
625 // Roll up the loop to keep the code size down.
626 Label loop_n, loop_m;
627
628 Register out = x0;
629 Register inputs_n_base = x1;
630 Register inputs_m_base = x2;
631 Register inputs_d_base = x3;
632 Register inputs_n_last_16bytes = x4;
633 Register inputs_m_last_16bytes = x5;
634 Register index_n = x6;
635 Register index_m = x7;
636
637 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
638 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
639
640 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
641 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
642 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
643 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
644 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
645
646 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
647 const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
648 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
649 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
650 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
651
652 // Always load and store 128 bits regardless of the format.
653 VRegister vd = v0.V16B();
654 VRegister vn = v1.V16B();
655 VRegister vm = v2.V16B();
656 VRegister vntmp = v3.V16B();
657 VRegister vmtmp = v4.V16B();
658 VRegister vres = v5.V16B();
659
660 // These will have the correct format for calling the 'helper'.
661 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count);
662 VRegister vm_helper = VRegister::Create(2, vm_bits, vm_lane_count);
663 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count);
664
665 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
666 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits);
667 VRegister vmtmp_single = VRegister::Create(4, vm_lane_bits);
668
669 __ Mov(out, results);
670
671 __ Mov(inputs_d_base, inputs_d);
672
673 __ Mov(inputs_n_base, inputs_n);
674 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
675 __ Mov(inputs_m_base, inputs_m);
676 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
677
678 __ Ldr(vd, MemOperand(inputs_d_base));
679 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
680 __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
681
682 __ Mov(index_n, 0);
683 __ Bind(&loop_n);
684
685 __ Ldr(vntmp_single,
686 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
687 __ Ext(vn, vn, vntmp, vn_lane_bytes);
688
689 __ Mov(index_m, 0);
690 __ Bind(&loop_m);
691
692 __ Ldr(vmtmp_single,
693 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
694 __ Ext(vm, vm, vmtmp, vm_lane_bytes);
695
696 __ Mov(vres, vd);
697
698 (masm.*helper)(vres_helper, vn_helper, vm_helper);
699
700 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
701
702 __ Add(index_m, index_m, 1);
703 __ Cmp(index_m, inputs_m_length);
704 __ B(lo, &loop_m);
705
706 __ Add(index_n, index_n, 1);
707 __ Cmp(index_n, inputs_n_length);
708 __ B(lo, &loop_n);
709
710 END();
711 RUN();
712 TEARDOWN();
713 }
714
715 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
716 // arrays of rawbit representation of input values. This ensures that
717 // exact bit comparisons can be performed.
718 template <typename Td, typename Tn, typename Tm>
719 void Test2OpNEON(const char* name, Test2OpNEONHelper_t helper,
720 const Td inputs_d[], const Tn inputs_n[],
721 unsigned inputs_n_length, const Tm inputs_m[],
722 unsigned inputs_m_length, const Td expected[],
723 unsigned expected_length, VectorFormat vd_form,
724 VectorFormat vn_form, VectorFormat vm_form) {
725 DCHECK(inputs_n_length > 0 && inputs_m_length > 0);
726
727 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
728
729 const unsigned results_length = inputs_n_length * inputs_m_length;
730 Td* results = new Td[results_length * vd_lane_count];
731 const unsigned lane_bit = sizeof(Td) * 8;
732 const unsigned lane_len_in_hex =
733 static_cast<unsigned>(std::max(sizeof(Td), sizeof(Tm)) * 8) / 4;
734
735 Test2OpNEON_Helper(helper, reinterpret_cast<uintptr_t>(inputs_d),
736 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
737 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length,
738 reinterpret_cast<uintptr_t>(results), vd_form, vn_form,
739 vm_form);
740
741 if (CcTest::sim_test_trace()) {
742 // Print the results.
743 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
744 for (unsigned iteration = 0; iteration < results_length; iteration++) {
745 printf(" ");
746 // Output a separate result for each element of the result vector.
747 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
748 unsigned index = lane + (iteration * vd_lane_count);
749 printf(" 0x%0*" PRIx64 ",", lane_len_in_hex,
750 static_cast<uint64_t>(results[index]));
751 }
752 printf("\n");
753 }
754
755 printf("};\n");
756 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", name,
757 results_length);
758 } else {
759 // Check the results.
760 CHECK(expected_length == results_length);
761 unsigned error_count = 0;
762 unsigned d = 0;
763 const char* padding = " ";
764 DCHECK_GE(strlen(padding), lane_len_in_hex + 1);
765 for (unsigned n = 0; n < inputs_n_length; n++) {
766 for (unsigned m = 0; m < inputs_m_length; m++, d++) {
767 bool error_in_vector = false;
768
769 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
770 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
771 (m * vd_lane_count) + lane;
772
773 if (results[output_index] != expected[output_index]) {
774 error_in_vector = true;
775 break;
776 }
777 }
778
779 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
780 printf("%s\n", name);
781 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n",
782 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding,
783 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding);
784
785 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
786 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
787 (m * vd_lane_count) + lane;
788 unsigned input_index_n =
789 (inputs_n_length - vd_lane_count + n + 1 + lane) %
790 inputs_n_length;
791 unsigned input_index_m =
792 (inputs_m_length - vd_lane_count + m + 1 + lane) %
793 inputs_m_length;
794
795 printf(
796 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
797 " "
798 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
799 results[output_index] != expected[output_index] ? '*' : ' ',
800 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]),
801 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]),
802 lane_len_in_hex, static_cast<uint64_t>(inputs_m[input_index_m]),
803 lane_len_in_hex, static_cast<uint64_t>(results[output_index]),
804 lane_len_in_hex, static_cast<uint64_t>(expected[output_index]));
805 }
806 }
807 }
808 }
809 DCHECK_EQ(d, expected_length);
810 if (error_count > kErrorReportLimit) {
811 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
812 }
813 DCHECK_EQ(error_count, 0U);
814 }
815 delete[] results;
816 }
817
818 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ====
819
820 void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,
821 uintptr_t inputs_d, uintptr_t inputs_n,
822 unsigned inputs_n_length, uintptr_t inputs_m,
823 unsigned inputs_m_length, const int indices[],
824 unsigned indices_length, uintptr_t results,
825 VectorFormat vd_form, VectorFormat vn_form,
826 VectorFormat vm_form) {
827 DCHECK_NE(vd_form, kFormatUndefined);
828 DCHECK_NE(vn_form, kFormatUndefined);
829 DCHECK_NE(vm_form, kFormatUndefined);
830
831 SETUP();
832 START();
833
834 // Roll up the loop to keep the code size down.
835 Label loop_n, loop_m;
836
837 Register out = x0;
838 Register inputs_n_base = x1;
839 Register inputs_m_base = x2;
840 Register inputs_d_base = x3;
841 Register inputs_n_last_16bytes = x4;
842 Register inputs_m_last_16bytes = x5;
843 Register index_n = x6;
844 Register index_m = x7;
845
846 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
847 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
848
849 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
850 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
851 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
852 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
853 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
854
855 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);
856 const unsigned vm_lane_count = LaneCountFromFormat(vm_form);
857 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);
858 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);
859 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);
860
861 // Always load and store 128 bits regardless of the format.
862 VRegister vd = v0.V16B();
863 VRegister vn = v1.V16B();
864 VRegister vm = v2.V16B();
865 VRegister vntmp = v3.V16B();
866 VRegister vmtmp = v4.V16B();
867 VRegister vres = v5.V16B();
868
869 // These will have the correct format for calling the 'helper'.
870 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count);
871 VRegister vm_helper = VRegister::Create(2, vm_bits, vm_lane_count);
872 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count);
873
874 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
875 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits);
876 VRegister vmtmp_single = VRegister::Create(4, vm_lane_bits);
877
878 __ Mov(out, results);
879
880 __ Mov(inputs_d_base, inputs_d);
881
882 __ Mov(inputs_n_base, inputs_n);
883 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));
884 __ Mov(inputs_m_base, inputs_m);
885 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));
886
887 __ Ldr(vd, MemOperand(inputs_d_base));
888 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
889 __ Ldr(vm, MemOperand(inputs_m_last_16bytes));
890
891 __ Mov(index_n, 0);
892 __ Bind(&loop_n);
893
894 __ Ldr(vntmp_single,
895 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
896 __ Ext(vn, vn, vntmp, vn_lane_bytes);
897
898 __ Mov(index_m, 0);
899 __ Bind(&loop_m);
900
901 __ Ldr(vmtmp_single,
902 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));
903 __ Ext(vm, vm, vmtmp, vm_lane_bytes);
904
905 __ Mov(vres, vd);
906 {
907 for (unsigned i = 0; i < indices_length; i++) {
908 (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]);
909 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
910 }
911 }
912
913 __ Add(index_m, index_m, 1);
914 __ Cmp(index_m, inputs_m_length);
915 __ B(lo, &loop_m);
916
917 __ Add(index_n, index_n, 1);
918 __ Cmp(index_n, inputs_n_length);
919 __ B(lo, &loop_n);
920
921 END();
922 RUN();
923 TEARDOWN();
924 }
925
926 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
927 // arrays of rawbit representation of input values. This ensures that
928 // exact bit comparisons can be performed.
929 template <typename Td, typename Tn, typename Tm>
930 void TestByElementNEON(const char* name, TestByElementNEONHelper_t helper,
931 const Td inputs_d[], const Tn inputs_n[],
932 unsigned inputs_n_length, const Tm inputs_m[],
933 unsigned inputs_m_length, const int indices[],
934 unsigned indices_length, const Td expected[],
935 unsigned expected_length, VectorFormat vd_form,
936 VectorFormat vn_form, VectorFormat vm_form) {
937 DCHECK_GT(inputs_n_length, 0U);
938 DCHECK_GT(inputs_m_length, 0U);
939 DCHECK_GT(indices_length, 0U);
940
941 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);
942
943 const unsigned results_length =
944 inputs_n_length * inputs_m_length * indices_length;
945 Td* results = new Td[results_length * vd_lane_count];
946 const unsigned lane_bit = sizeof(Td) * 8;
947 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();
948
949 TestByElementNEON_Helper(
950 helper, reinterpret_cast<uintptr_t>(inputs_d),
951 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,
952 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length, indices,
953 indices_length, reinterpret_cast<uintptr_t>(results), vd_form, vn_form,
954 vm_form);
955
956 if (CcTest::sim_test_trace()) {
957 // Print the results.
958 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
959 for (unsigned iteration = 0; iteration < results_length; iteration++) {
960 printf(" ");
961 // Output a separate result for each element of the result vector.
962 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
963 unsigned index = lane + (iteration * vd_lane_count);
964 printf(" 0x%0*" PRIx64 ",", lane_len_in_hex,
965 static_cast<uint64_t>(results[index]));
966 }
967 printf("\n");
968 }
969
970 printf("};\n");
971 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", name,
972 results_length);
973 } else {
974 // Check the results.
975 CHECK(expected_length == results_length);
976 unsigned error_count = 0;
977 unsigned d = 0;
978 const char* padding = " ";
979 DCHECK_GE(strlen(padding), lane_len_in_hex + 1);
980 for (unsigned n = 0; n < inputs_n_length; n++) {
981 for (unsigned m = 0; m < inputs_m_length; m++) {
982 for (unsigned index = 0; index < indices_length; index++, d++) {
983 bool error_in_vector = false;
984
985 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
986 unsigned output_index =
987 (n * inputs_m_length * indices_length * vd_lane_count) +
988 (m * indices_length * vd_lane_count) + (index * vd_lane_count) +
989 lane;
990
991 if (results[output_index] != expected[output_index]) {
992 error_in_vector = true;
993 break;
994 }
995 }
996
997 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
998 printf("%s\n", name);
999 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n",
1000 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding,
1001 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding);
1002
1003 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1004 unsigned output_index =
1005 (n * inputs_m_length * indices_length * vd_lane_count) +
1006 (m * indices_length * vd_lane_count) +
1007 (index * vd_lane_count) + lane;
1008 unsigned input_index_n =
1009 (inputs_n_length - vd_lane_count + n + 1 + lane) %
1010 inputs_n_length;
1011 unsigned input_index_m =
1012 (inputs_m_length - vd_lane_count + m + 1 + lane) %
1013 inputs_m_length;
1014
1015 printf(
1016 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
1017 " "
1018 "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1019 results[output_index] != expected[output_index] ? '*' : ' ',
1020 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]),
1021 lane_len_in_hex,
1022 static_cast<uint64_t>(inputs_n[input_index_n]),
1023 lane_len_in_hex,
1024 static_cast<uint64_t>(inputs_m[input_index_m]),
1025 indices[index], lane_len_in_hex,
1026 static_cast<uint64_t>(results[output_index]), lane_len_in_hex,
1027 static_cast<uint64_t>(expected[output_index]));
1028 }
1029 }
1030 }
1031 }
1032 }
1033 DCHECK_EQ(d, expected_length);
1034 if (error_count > kErrorReportLimit) {
1035 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1036 }
1037 CHECK(error_count == 0);
1038 }
1039 delete[] results;
1040 }
1041
1042 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ====
1043
1044 template <typename Tm>
1045 void Test2OpImmNEON_Helper(
1046 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
1047 uintptr_t inputs_n, unsigned inputs_n_length, const Tm inputs_m[],
1048 unsigned inputs_m_length, uintptr_t results, VectorFormat vd_form,
1049 VectorFormat vn_form) {
1050 DCHECK(vd_form != kFormatUndefined && vn_form != kFormatUndefined);
1051
1052 SETUP();
1053 START();
1054
1055 // Roll up the loop to keep the code size down.
1056 Label loop_n;
1057
1058 Register out = x0;
1059 Register inputs_n_base = x1;
1060 Register inputs_n_last_16bytes = x3;
1061 Register index_n = x5;
1062
1063 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1064 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1065
1066 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1067 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1068 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1069 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1070 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1071
1072 // These will be either a D- or a Q-register form, with a single lane
1073 // (for use in scalar load and store operations).
1074 VRegister vd = VRegister::Create(0, vd_bits);
1075 VRegister vn = v1.V16B();
1076 VRegister vntmp = v3.V16B();
1077
1078 // These will have the correct format for use when calling 'helper'.
1079 VRegister vd_helper = VRegister::Create(0, vd_bits, vd_lane_count);
1080 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count);
1081
1082 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1083 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits);
1084
1085 __ Mov(out, results);
1086
1087 __ Mov(inputs_n_base, inputs_n);
1088 __ Mov(inputs_n_last_16bytes,
1089 inputs_n + (vn_lane_bytes * inputs_n_length) - 16);
1090
1091 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));
1092
1093 __ Mov(index_n, 0);
1094 __ Bind(&loop_n);
1095
1096 __ Ldr(vntmp_single,
1097 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1098 __ Ext(vn, vn, vntmp, vn_lane_bytes);
1099
1100 // Set the destination to zero for tests such as '[r]shrn2'.
1101 // TODO(all): Setting the destination to values other than zero might be a
1102 // better test for shift and accumulate instructions (srsra/ssra/usra/ursra).
1103 __ Movi(vd.V16B(), 0);
1104
1105 {
1106 for (unsigned i = 0; i < inputs_m_length; i++) {
1107 (masm.*helper)(vd_helper, vn_helper, inputs_m[i]);
1108 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));
1109 }
1110 }
1111
1112 __ Add(index_n, index_n, 1);
1113 __ Cmp(index_n, inputs_n_length);
1114 __ B(lo, &loop_n);
1115
1116 END();
1117 RUN();
1118 TEARDOWN();
1119 }
1120
1121 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1122 // arrays of rawbit representation of input values. This ensures that
1123 // exact bit comparisons can be performed.
1124 template <typename Td, typename Tn, typename Tm>
1125 void Test2OpImmNEON(const char* name,
1126 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,
1127 const Tn inputs_n[], unsigned inputs_n_length,
1128 const Tm inputs_m[], unsigned inputs_m_length,
1129 const Td expected[], unsigned expected_length,
1130 VectorFormat vd_form, VectorFormat vn_form) {
1131 DCHECK(inputs_n_length > 0 && inputs_m_length > 0);
1132
1133 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1134 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1135 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1136
1137 const unsigned results_length = inputs_n_length * inputs_m_length;
1138 Td* results = new Td[results_length * vd_lane_count];
1139 const unsigned lane_bit = sizeof(Td) * 8;
1140 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1141
1142 Test2OpImmNEON_Helper(helper, reinterpret_cast<uintptr_t>(inputs_n),
1143 inputs_n_length, inputs_m, inputs_m_length,
1144 reinterpret_cast<uintptr_t>(results), vd_form, vn_form);
1145
1146 if (CcTest::sim_test_trace()) {
1147 // Print the results.
1148 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1149 for (unsigned iteration = 0; iteration < results_length; iteration++) {
1150 printf(" ");
1151 // Output a separate result for each element of the result vector.
1152 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1153 unsigned index = lane + (iteration * vd_lane_count);
1154 printf(" 0x%0*" PRIx64 ",", lane_len_in_hex,
1155 static_cast<uint64_t>(results[index]));
1156 }
1157 printf("\n");
1158 }
1159
1160 printf("};\n");
1161 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", name,
1162 results_length);
1163 } else {
1164 // Check the results.
1165 CHECK(expected_length == results_length);
1166 unsigned error_count = 0;
1167 unsigned d = 0;
1168 const char* padding = " ";
1169 DCHECK_GE(strlen(padding), lane_len_in_hex + 1);
1170 for (unsigned n = 0; n < inputs_n_length; n++) {
1171 for (unsigned m = 0; m < inputs_m_length; m++, d++) {
1172 bool error_in_vector = false;
1173
1174 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1175 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
1176 (m * vd_lane_count) + lane;
1177
1178 if (results[output_index] != expected[output_index]) {
1179 error_in_vector = true;
1180 break;
1181 }
1182 }
1183
1184 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1185 printf("%s\n", name);
1186 printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", lane_len_in_hex + 1,
1187 padding, lane_len_in_hex, padding, lane_len_in_hex + 1,
1188 padding);
1189
1190 const unsigned first_index_n =
1191 inputs_n_length - (16 / vn_lane_bytes) + n + 1;
1192
1193 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);
1194 lane++) {
1195 unsigned output_index = (n * inputs_m_length * vd_lane_count) +
1196 (m * vd_lane_count) + lane;
1197 unsigned input_index_n = (first_index_n + lane) % inputs_n_length;
1198 unsigned input_index_m = m;
1199
1200 printf(
1201 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64
1202 " "
1203 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1204 results[output_index] != expected[output_index] ? '*' : ' ',
1205 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]),
1206 lane_len_in_hex, static_cast<uint64_t>(inputs_m[input_index_m]),
1207 lane_len_in_hex, static_cast<uint64_t>(results[output_index]),
1208 lane_len_in_hex, static_cast<uint64_t>(expected[output_index]));
1209 }
1210 }
1211 }
1212 }
1213 DCHECK_EQ(d, expected_length);
1214 if (error_count > kErrorReportLimit) {
1215 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1216 }
1217 CHECK(error_count == 0);
1218 }
1219 delete[] results;
1220 }
1221
1222 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====
1223
1224 void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,
1225 uintptr_t inputs_d, const int inputs_imm1[],
1226 unsigned inputs_imm1_length, uintptr_t inputs_n,
1227 unsigned inputs_n_length,
1228 const int inputs_imm2[],
1229 unsigned inputs_imm2_length, uintptr_t results,
1230 VectorFormat vd_form, VectorFormat vn_form) {
1231 DCHECK_NE(vd_form, kFormatUndefined);
1232 DCHECK_NE(vn_form, kFormatUndefined);
1233
1234 SETUP();
1235 START();
1236
1237 // Roll up the loop to keep the code size down.
1238 Label loop_n;
1239
1240 Register out = x0;
1241 Register inputs_d_base = x1;
1242 Register inputs_n_base = x2;
1243 Register inputs_n_last_vector = x4;
1244 Register index_n = x6;
1245
1246 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);
1247 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1248
1249 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);
1250 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);
1251 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);
1252 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);
1253 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);
1254
1255 // These will be either a D- or a Q-register form, with a single lane
1256 // (for use in scalar load and store operations).
1257 VRegister vd = VRegister::Create(0, vd_bits);
1258 VRegister vn = VRegister::Create(1, vn_bits);
1259 VRegister vntmp = VRegister::Create(4, vn_bits);
1260 VRegister vres = VRegister::Create(5, vn_bits);
1261
1262 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count);
1263 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count);
1264
1265 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.
1266 VRegister vntmp_single = VRegister::Create(4, vn_lane_bits);
1267
1268 // Same registers for use in the 'ext' instructions.
1269 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();
1270 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();
1271
1272 __ Mov(out, results);
1273
1274 __ Mov(inputs_d_base, inputs_d);
1275
1276 __ Mov(inputs_n_base, inputs_n);
1277 __ Mov(inputs_n_last_vector,
1278 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));
1279
1280 __ Ldr(vd, MemOperand(inputs_d_base));
1281
1282 __ Ldr(vn, MemOperand(inputs_n_last_vector));
1283
1284 __ Mov(index_n, 0);
1285 __ Bind(&loop_n);
1286
1287 __ Ldr(vntmp_single,
1288 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));
1289 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);
1290
1291 for (unsigned i = 0; i < inputs_imm1_length; i++) {
1292 for (unsigned j = 0; j < inputs_imm2_length; j++) {
1293 __ Mov(vres, vd);
1294 (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]);
1295 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));
1296 }
1297 }
1298
1299 __ Add(index_n, index_n, 1);
1300 __ Cmp(index_n, inputs_n_length);
1301 __ B(lo, &loop_n);
1302
1303 END();
1304 RUN();
1305 TEARDOWN();
1306 }
1307
1308 // Test NEON instructions. The inputs_*[] and expected[] arrays should be
1309 // arrays of rawbit representation of input values. This ensures that
1310 // exact bit comparisons can be performed.
1311 template <typename Td, typename Tn>
1312 void TestOpImmOpImmNEON(const char* name,
1313 TestOpImmOpImmVdUpdateNEONHelper_t helper,
1314 const Td inputs_d[], const int inputs_imm1[],
1315 unsigned inputs_imm1_length, const Tn inputs_n[],
1316 unsigned inputs_n_length, const int inputs_imm2[],
1317 unsigned inputs_imm2_length, const Td expected[],
1318 unsigned expected_length, VectorFormat vd_form,
1319 VectorFormat vn_form) {
1320 DCHECK_GT(inputs_n_length, 0U);
1321 DCHECK_GT(inputs_imm1_length, 0U);
1322 DCHECK_GT(inputs_imm2_length, 0U);
1323
1324 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);
1325
1326 const unsigned results_length =
1327 inputs_n_length * inputs_imm1_length * inputs_imm2_length;
1328
1329 Td* results = new Td[results_length * vd_lane_count];
1330 const unsigned lane_bit = sizeof(Td) * 8;
1331 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();
1332
1333 TestOpImmOpImmNEON_Helper(
1334 helper, reinterpret_cast<uintptr_t>(inputs_d), inputs_imm1,
1335 inputs_imm1_length, reinterpret_cast<uintptr_t>(inputs_n),
1336 inputs_n_length, inputs_imm2, inputs_imm2_length,
1337 reinterpret_cast<uintptr_t>(results), vd_form, vn_form);
1338
1339 if (CcTest::sim_test_trace()) {
1340 // Print the results.
1341 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name);
1342 for (unsigned iteration = 0; iteration < results_length; iteration++) {
1343 printf(" ");
1344 // Output a separate result for each element of the result vector.
1345 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1346 unsigned index = lane + (iteration * vd_lane_count);
1347 printf(" 0x%0*" PRIx64 ",", lane_len_in_hex,
1348 static_cast<uint64_t>(results[index]));
1349 }
1350 printf("\n");
1351 }
1352
1353 printf("};\n");
1354 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", name,
1355 results_length);
1356 } else {
1357 // Check the results.
1358 CHECK(expected_length == results_length);
1359 unsigned error_count = 0;
1360 unsigned counted_length = 0;
1361 const char* padding = " ";
1362 DCHECK(strlen(padding) >= (lane_len_in_hex + 1));
1363 for (unsigned n = 0; n < inputs_n_length; n++) {
1364 for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) {
1365 for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) {
1366 bool error_in_vector = false;
1367
1368 counted_length++;
1369
1370 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1371 unsigned output_index =
1372 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) +
1373 (imm1 * inputs_imm2_length * vd_lane_count) +
1374 (imm2 * vd_lane_count) + lane;
1375
1376 if (results[output_index] != expected[output_index]) {
1377 error_in_vector = true;
1378 break;
1379 }
1380 }
1381
1382 if (error_in_vector && (++error_count <= kErrorReportLimit)) {
1383 printf("%s\n", name);
1384 printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n",
1385 lane_len_in_hex + 1, padding, lane_len_in_hex, padding,
1386 lane_len_in_hex + 1, padding, lane_len_in_hex, padding,
1387 lane_len_in_hex + 1, padding);
1388
1389 for (unsigned lane = 0; lane < vd_lane_count; lane++) {
1390 unsigned output_index =
1391 (n * inputs_imm1_length * inputs_imm2_length *
1392 vd_lane_count) +
1393 (imm1 * inputs_imm2_length * vd_lane_count) +
1394 (imm2 * vd_lane_count) + lane;
1395 unsigned input_index_n =
1396 (inputs_n_length - vd_lane_count + n + 1 + lane) %
1397 inputs_n_length;
1398 unsigned input_index_imm1 = imm1;
1399 unsigned input_index_imm2 = imm2;
1400
1401 printf(
1402 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64
1403 " "
1404 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n",
1405 results[output_index] != expected[output_index] ? '*' : ' ',
1406 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]),
1407 lane_len_in_hex,
1408 static_cast<uint64_t>(inputs_imm1[input_index_imm1]),
1409 lane_len_in_hex,
1410 static_cast<uint64_t>(inputs_n[input_index_n]),
1411 lane_len_in_hex,
1412 static_cast<uint64_t>(inputs_imm2[input_index_imm2]),
1413 lane_len_in_hex, static_cast<uint64_t>(results[output_index]),
1414 lane_len_in_hex,
1415 static_cast<uint64_t>(expected[output_index]));
1416 }
1417 }
1418 }
1419 }
1420 }
1421 DCHECK_EQ(counted_length, expected_length);
1422 if (error_count > kErrorReportLimit) {
1423 printf("%u other errors follow.\n", error_count - kErrorReportLimit);
1424 }
1425 CHECK(error_count == 0);
1426 }
1427 delete[] results;
1428 }
1429
1430 } // anonymous namespace
1431
1432 // ==== NEON Tests. ====
1433
1434 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \
1435 Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
1436 &MacroAssembler::mnemonic, input_n, \
1437 (sizeof(input_n) / sizeof(input_n[0])), \
1438 kExpected_NEON_##mnemonic##_##vdform, \
1439 kExpectedCount_NEON_##mnemonic##_##vdform, kFormat##vdform, \
1440 kFormat##vnform)
1441
1442 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n) \
1443 Test1OpAcrossNEON( \
1444 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(vnform), \
1445 &MacroAssembler::mnemonic, input_n, \
1446 (sizeof(input_n) / sizeof(input_n[0])), \
1447 kExpected_NEON_##mnemonic##_##vdform##_##vnform, \
1448 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform, kFormat##vdform, \
1449 kFormat##vnform)
1450
1451 #define CALL_TEST_NEON_HELPER_2Op(mnemonic, vdform, vnform, vmform, input_d, \
1452 input_n, input_m) \
1453 Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \
1454 &MacroAssembler::mnemonic, input_d, input_n, \
1455 (sizeof(input_n) / sizeof(input_n[0])), input_m, \
1456 (sizeof(input_m) / sizeof(input_m[0])), \
1457 kExpected_NEON_##mnemonic##_##vdform, \
1458 kExpectedCount_NEON_##mnemonic##_##vdform, kFormat##vdform, \
1459 kFormat##vnform, kFormat##vmform)
1460
1461 #define CALL_TEST_NEON_HELPER_2OpImm(mnemonic, vdform, vnform, input_n, \
1462 input_m) \
1463 Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \
1464 &MacroAssembler::mnemonic, input_n, \
1465 (sizeof(input_n) / sizeof(input_n[0])), input_m, \
1466 (sizeof(input_m) / sizeof(input_m[0])), \
1467 kExpected_NEON_##mnemonic##_##vdform##_2OPIMM, \
1468 kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM, \
1469 kFormat##vdform, kFormat##vnform)
1470
1471 #define CALL_TEST_NEON_HELPER_ByElement(mnemonic, vdform, vnform, vmform, \
1472 input_d, input_n, input_m, indices) \
1473 TestByElementNEON( \
1474 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \
1475 vnform) "_" STRINGIFY(vmform), \
1476 &MacroAssembler::mnemonic, input_d, input_n, \
1477 (sizeof(input_n) / sizeof(input_n[0])), input_m, \
1478 (sizeof(input_m) / sizeof(input_m[0])), indices, \
1479 (sizeof(indices) / sizeof(indices[0])), \
1480 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
1481 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \
1482 kFormat##vdform, kFormat##vnform, kFormat##vmform)
1483
1484 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper, mnemonic, vdform, vnform, \
1485 input_d, input_imm1, input_n, \
1486 input_imm2) \
1487 TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), helper, \
1488 input_d, input_imm1, \
1489 (sizeof(input_imm1) / sizeof(input_imm1[0])), input_n, \
1490 (sizeof(input_n) / sizeof(input_n[0])), input_imm2, \
1491 (sizeof(input_imm2) / sizeof(input_imm2[0])), \
1492 kExpected_NEON_##mnemonic##_##vdform, \
1493 kExpectedCount_NEON_##mnemonic##_##vdform, \
1494 kFormat##vdform, kFormat##vnform)
1495
1496 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \
1497 CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input)
1498
1499 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \
1500 SIMTEST(mnemonic##_8B) { \
1501 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input); \
1502 } \
1503 SIMTEST(mnemonic##_16B) { \
1504 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \
1505 }
1506
1507 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) \
1508 SIMTEST(mnemonic##_4H) { \
1509 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \
1510 } \
1511 SIMTEST(mnemonic##_8H) { \
1512 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \
1513 }
1514
1515 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \
1516 SIMTEST(mnemonic##_2S) { \
1517 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \
1518 } \
1519 SIMTEST(mnemonic##_4S) { \
1520 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \
1521 }
1522
1523 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
1524 DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \
1525 DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)
1526
1527 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
1528 DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \
1529 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)
1530
1531 #define DEFINE_TEST_NEON_2SAME(mnemonic, input) \
1532 DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \
1533 SIMTEST(mnemonic##_2D) { \
1534 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
1535 }
1536 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input) \
1537 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \
1538 SIMTEST(mnemonic##_2D) { \
1539 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \
1540 }
1541
1542 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \
1543 SIMTEST(mnemonic##_2S) { \
1544 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input); \
1545 } \
1546 SIMTEST(mnemonic##_4S) { \
1547 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input); \
1548 } \
1549 SIMTEST(mnemonic##_2D) { \
1550 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \
1551 }
1552
1553 #define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input) \
1554 SIMTEST(mnemonic##_S) { \
1555 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input); \
1556 } \
1557 SIMTEST(mnemonic##_D) { \
1558 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \
1559 }
1560
1561 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \
1562 SIMTEST(mnemonic##_B) { \
1563 CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \
1564 }
1565 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \
1566 SIMTEST(mnemonic##_H) { \
1567 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \
1568 }
1569 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
1570 SIMTEST(mnemonic##_S) { \
1571 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \
1572 }
1573 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) \
1574 SIMTEST(mnemonic##_D) { \
1575 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \
1576 }
1577
1578 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \
1579 DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \
1580 DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \
1581 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
1582 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
1583
1584 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \
1585 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \
1586 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)
1587
1588 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \
1589 CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n)
1590
1591 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input) \
1592 SIMTEST(mnemonic##_B_8B) { \
1593 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input); \
1594 } \
1595 SIMTEST(mnemonic##_B_16B) { \
1596 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \
1597 } \
1598 SIMTEST(mnemonic##_H_4H) { \
1599 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \
1600 } \
1601 SIMTEST(mnemonic##_H_8H) { \
1602 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \
1603 } \
1604 SIMTEST(mnemonic##_S_4S) { \
1605 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \
1606 }
1607
1608 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input) \
1609 SIMTEST(mnemonic##_H_8B) { \
1610 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input); \
1611 } \
1612 SIMTEST(mnemonic##_H_16B) { \
1613 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \
1614 } \
1615 SIMTEST(mnemonic##_S_4H) { \
1616 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \
1617 } \
1618 SIMTEST(mnemonic##_S_8H) { \
1619 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \
1620 } \
1621 SIMTEST(mnemonic##_D_4S) { \
1622 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \
1623 }
1624
1625 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input) \
1626 SIMTEST(mnemonic##_S_4S) { \
1627 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \
1628 }
1629
1630 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \
1631 CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n)
1632
1633 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input) \
1634 SIMTEST(mnemonic##_4H) { \
1635 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input); \
1636 } \
1637 SIMTEST(mnemonic##_8H) { \
1638 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \
1639 } \
1640 SIMTEST(mnemonic##_2S) { \
1641 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \
1642 } \
1643 SIMTEST(mnemonic##_4S) { \
1644 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \
1645 } \
1646 SIMTEST(mnemonic##_1D) { \
1647 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \
1648 } \
1649 SIMTEST(mnemonic##_2D) { \
1650 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \
1651 }
1652
1653 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input) \
1654 SIMTEST(mnemonic##_8B) { \
1655 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input); \
1656 } \
1657 SIMTEST(mnemonic##_4H) { \
1658 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input); \
1659 } \
1660 SIMTEST(mnemonic##_2S) { \
1661 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input); \
1662 } \
1663 SIMTEST(mnemonic##2_16B) { \
1664 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \
1665 } \
1666 SIMTEST(mnemonic##2_8H) { \
1667 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \
1668 } \
1669 SIMTEST(mnemonic##2_4S) { \
1670 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \
1671 }
1672
1673 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input) \
1674 SIMTEST(mnemonic##_4S) { \
1675 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input); \
1676 } \
1677 SIMTEST(mnemonic##_2D) { \
1678 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input); \
1679 } \
1680 SIMTEST(mnemonic##2_4S) { \
1681 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \
1682 } \
1683 SIMTEST(mnemonic##2_2D) { \
1684 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input); \
1685 }
1686
1687 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input) \
1688 SIMTEST(mnemonic##_4H) { \
1689 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input); \
1690 } \
1691 SIMTEST(mnemonic##_2S) { \
1692 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \
1693 } \
1694 SIMTEST(mnemonic##2_8H) { \
1695 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input); \
1696 } \
1697 SIMTEST(mnemonic##2_4S) { \
1698 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
1699 }
1700
1701 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input) \
1702 SIMTEST(mnemonic##_2S) { \
1703 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \
1704 } \
1705 SIMTEST(mnemonic##2_4S) { \
1706 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \
1707 }
1708
1709 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input) \
1710 SIMTEST(mnemonic##_B) { \
1711 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \
1712 } \
1713 SIMTEST(mnemonic##_H) { \
1714 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \
1715 } \
1716 SIMTEST(mnemonic##_S) { \
1717 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \
1718 }
1719
1720 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input) \
1721 SIMTEST(mnemonic##_S) { \
1722 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input); \
1723 } \
1724 SIMTEST(mnemonic##_D) { \
1725 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \
1726 }
1727
1728 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \
1729 { \
1730 CALL_TEST_NEON_HELPER_2Op(mnemonic, variant, variant, variant, input_d, \
1731 input_nm, input_nm); \
1732 }
1733
1734 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \
1735 SIMTEST(mnemonic##_8B) { \
1736 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8B, kInput8bitsAccDestination, \
1737 kInput8bits##input); \
1738 } \
1739 SIMTEST(mnemonic##_16B) { \
1740 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 16B, kInput8bitsAccDestination, \
1741 kInput8bits##input); \
1742 }
1743
1744 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) \
1745 SIMTEST(mnemonic##_4H) { \
1746 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4H, kInput16bitsAccDestination, \
1747 kInput16bits##input); \
1748 } \
1749 SIMTEST(mnemonic##_8H) { \
1750 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8H, kInput16bitsAccDestination, \
1751 kInput16bits##input); \
1752 } \
1753 SIMTEST(mnemonic##_2S) { \
1754 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, kInput32bitsAccDestination, \
1755 kInput32bits##input); \
1756 } \
1757 SIMTEST(mnemonic##_4S) { \
1758 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, kInput32bitsAccDestination, \
1759 kInput32bits##input); \
1760 }
1761
1762 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
1763 DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \
1764 DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)
1765
1766 #define DEFINE_TEST_NEON_3SAME(mnemonic, input) \
1767 DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \
1768 SIMTEST(mnemonic##_2D) { \
1769 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, kInput64bitsAccDestination, \
1770 kInput64bits##input); \
1771 }
1772
1773 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input) \
1774 SIMTEST(mnemonic##_2S) { \
1775 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, kInputFloatAccDestination, \
1776 kInputFloat##input); \
1777 } \
1778 SIMTEST(mnemonic##_4S) { \
1779 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, kInputFloatAccDestination, \
1780 kInputFloat##input); \
1781 } \
1782 SIMTEST(mnemonic##_2D) { \
1783 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, kInputDoubleAccDestination, \
1784 kInputDouble##input); \
1785 }
1786
1787 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input) \
1788 SIMTEST(mnemonic##_D) { \
1789 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInput64bitsAccDestination, \
1790 kInput64bits##input); \
1791 }
1792
1793 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input) \
1794 SIMTEST(mnemonic##_H) { \
1795 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, kInput16bitsAccDestination, \
1796 kInput16bits##input); \
1797 } \
1798 SIMTEST(mnemonic##_S) { \
1799 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInput32bitsAccDestination, \
1800 kInput32bits##input); \
1801 }
1802
1803 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input) \
1804 SIMTEST(mnemonic##_B) { \
1805 CALL_TEST_NEON_HELPER_3SAME(mnemonic, B, kInput8bitsAccDestination, \
1806 kInput8bits##input); \
1807 } \
1808 SIMTEST(mnemonic##_H) { \
1809 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, kInput16bitsAccDestination, \
1810 kInput16bits##input); \
1811 } \
1812 SIMTEST(mnemonic##_S) { \
1813 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInput32bitsAccDestination, \
1814 kInput32bits##input); \
1815 } \
1816 SIMTEST(mnemonic##_D) { \
1817 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInput64bitsAccDestination, \
1818 kInput64bits##input); \
1819 }
1820
1821 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input) \
1822 SIMTEST(mnemonic##_S) { \
1823 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInputFloatAccDestination, \
1824 kInputFloat##input); \
1825 } \
1826 SIMTEST(mnemonic##_D) { \
1827 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInputDoubleAccDestination, \
1828 kInputDouble##input); \
1829 }
1830
1831 #define CALL_TEST_NEON_HELPER_3DIFF(mnemonic, vdform, vnform, vmform, input_d, \
1832 input_n, input_m) \
1833 { \
1834 CALL_TEST_NEON_HELPER_2Op(mnemonic, vdform, vnform, vmform, input_d, \
1835 input_n, input_m); \
1836 }
1837
1838 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \
1839 SIMTEST(mnemonic##_8H) { \
1840 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8B, 8B, \
1841 kInput16bitsAccDestination, \
1842 kInput8bits##input, kInput8bits##input); \
1843 } \
1844 SIMTEST(mnemonic##2_8H) { \
1845 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 16B, 16B, \
1846 kInput16bitsAccDestination, \
1847 kInput8bits##input, kInput8bits##input); \
1848 }
1849
1850 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
1851 SIMTEST(mnemonic##_4S) { \
1852 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4H, 4H, \
1853 kInput32bitsAccDestination, \
1854 kInput16bits##input, kInput16bits##input); \
1855 } \
1856 SIMTEST(mnemonic##2_4S) { \
1857 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 8H, 8H, \
1858 kInput32bitsAccDestination, \
1859 kInput16bits##input, kInput16bits##input); \
1860 }
1861
1862 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) \
1863 SIMTEST(mnemonic##_2D) { \
1864 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2S, 2S, \
1865 kInput64bitsAccDestination, \
1866 kInput32bits##input, kInput32bits##input); \
1867 } \
1868 SIMTEST(mnemonic##2_2D) { \
1869 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 4S, 4S, \
1870 kInput64bitsAccDestination, \
1871 kInput32bits##input, kInput32bits##input); \
1872 }
1873
1874 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \
1875 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
1876 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
1877
1878 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \
1879 DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \
1880 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \
1881 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)
1882
1883 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
1884 SIMTEST(mnemonic##_S) { \
1885 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, S, H, H, kInput32bitsAccDestination, \
1886 kInput16bits##input, kInput16bits##input); \
1887 }
1888
1889 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \
1890 SIMTEST(mnemonic##_D) { \
1891 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, D, S, S, kInput64bitsAccDestination, \
1892 kInput32bits##input, kInput32bits##input); \
1893 }
1894
1895 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \
1896 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \
1897 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)
1898
1899 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input) \
1900 SIMTEST(mnemonic##_8H) { \
1901 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8H, 8B, \
1902 kInput16bitsAccDestination, \
1903 kInput16bits##input, kInput8bits##input); \
1904 } \
1905 SIMTEST(mnemonic##_4S) { \
1906 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4S, 4H, \
1907 kInput32bitsAccDestination, \
1908 kInput32bits##input, kInput16bits##input); \
1909 } \
1910 SIMTEST(mnemonic##_2D) { \
1911 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2D, 2S, \
1912 kInput64bitsAccDestination, \
1913 kInput64bits##input, kInput32bits##input); \
1914 } \
1915 SIMTEST(mnemonic##2_8H) { \
1916 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 8H, 16B, \
1917 kInput16bitsAccDestination, \
1918 kInput16bits##input, kInput8bits##input); \
1919 } \
1920 SIMTEST(mnemonic##2_4S) { \
1921 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 4S, 8H, \
1922 kInput32bitsAccDestination, \
1923 kInput32bits##input, kInput16bits##input); \
1924 } \
1925 SIMTEST(mnemonic##2_2D) { \
1926 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 2D, 4S, \
1927 kInput64bitsAccDestination, \
1928 kInput64bits##input, kInput32bits##input); \
1929 }
1930
1931 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input) \
1932 SIMTEST(mnemonic##_8B) { \
1933 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8B, 8H, 8H, \
1934 kInput8bitsAccDestination, \
1935 kInput16bits##input, kInput16bits##input); \
1936 } \
1937 SIMTEST(mnemonic##_4H) { \
1938 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4H, 4S, 4S, \
1939 kInput16bitsAccDestination, \
1940 kInput32bits##input, kInput32bits##input); \
1941 } \
1942 SIMTEST(mnemonic##_2S) { \
1943 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2S, 2D, 2D, \
1944 kInput32bitsAccDestination, \
1945 kInput64bits##input, kInput64bits##input); \
1946 } \
1947 SIMTEST(mnemonic##2_16B) { \
1948 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 16B, 8H, 8H, \
1949 kInput8bitsAccDestination, \
1950 kInput16bits##input, kInput16bits##input); \
1951 } \
1952 SIMTEST(mnemonic##2_8H) { \
1953 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 4S, 4S, \
1954 kInput16bitsAccDestination, \
1955 kInput32bits##input, kInput32bits##input); \
1956 } \
1957 SIMTEST(mnemonic##2_4S) { \
1958 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 2D, 2D, \
1959 kInput32bitsAccDestination, \
1960 kInput64bits##input, kInput64bits##input); \
1961 }
1962
1963 #define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, vdform, vnform, input_n, \
1964 input_imm) \
1965 { \
1966 CALL_TEST_NEON_HELPER_2OpImm(mnemonic, vdform, vnform, input_n, \
1967 input_imm); \
1968 }
1969
1970 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm) \
1971 SIMTEST(mnemonic##_8B_2OPIMM) { \
1972 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, 8B, kInput8bits##input, \
1973 kInput8bitsImm##input_imm); \
1974 } \
1975 SIMTEST(mnemonic##_16B_2OPIMM) { \
1976 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 16B, 16B, kInput8bits##input, \
1977 kInput8bitsImm##input_imm); \
1978 } \
1979 SIMTEST(mnemonic##_4H_2OPIMM) { \
1980 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, 4H, kInput16bits##input, \
1981 kInput16bitsImm##input_imm); \
1982 } \
1983 SIMTEST(mnemonic##_8H_2OPIMM) { \
1984 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, 8H, kInput16bits##input, \
1985 kInput16bitsImm##input_imm); \
1986 } \
1987 SIMTEST(mnemonic##_2S_2OPIMM) { \
1988 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInput32bits##input, \
1989 kInput32bitsImm##input_imm); \
1990 } \
1991 SIMTEST(mnemonic##_4S_2OPIMM) { \
1992 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInput32bits##input, \
1993 kInput32bitsImm##input_imm); \
1994 } \
1995 SIMTEST(mnemonic##_2D_2OPIMM) { \
1996 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInput64bits##input, \
1997 kInput64bitsImm##input_imm); \
1998 }
1999
2000 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \
2001 SIMTEST(mnemonic##_8B_2OPIMM) { \
2002 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, B, kInput8bits##input, \
2003 kInput8bitsImm##input_imm); \
2004 } \
2005 SIMTEST(mnemonic##_16B_2OPIMM) { \
2006 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 16B, B, kInput8bits##input, \
2007 kInput8bitsImm##input_imm); \
2008 } \
2009 SIMTEST(mnemonic##_4H_2OPIMM) { \
2010 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, H, kInput16bits##input, \
2011 kInput16bitsImm##input_imm); \
2012 } \
2013 SIMTEST(mnemonic##_8H_2OPIMM) { \
2014 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, H, kInput16bits##input, \
2015 kInput16bitsImm##input_imm); \
2016 } \
2017 SIMTEST(mnemonic##_2S_2OPIMM) { \
2018 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, S, kInput32bits##input, \
2019 kInput32bitsImm##input_imm); \
2020 } \
2021 SIMTEST(mnemonic##_4S_2OPIMM) { \
2022 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, S, kInput32bits##input, \
2023 kInput32bitsImm##input_imm); \
2024 } \
2025 SIMTEST(mnemonic##_2D_2OPIMM) { \
2026 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, D, kInput64bits##input, \
2027 kInput64bitsImm##input_imm); \
2028 }
2029
2030 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \
2031 SIMTEST(mnemonic##_8B_2OPIMM) { \
2032 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, 8H, kInput16bits##input, \
2033 kInput8bitsImm##input_imm); \
2034 } \
2035 SIMTEST(mnemonic##_4H_2OPIMM) { \
2036 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, 4S, kInput32bits##input, \
2037 kInput16bitsImm##input_imm); \
2038 } \
2039 SIMTEST(mnemonic##_2S_2OPIMM) { \
2040 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2D, kInput64bits##input, \
2041 kInput32bitsImm##input_imm); \
2042 } \
2043 SIMTEST(mnemonic##2_16B_2OPIMM) { \
2044 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 16B, 8H, kInput16bits##input, \
2045 kInput8bitsImm##input_imm); \
2046 } \
2047 SIMTEST(mnemonic##2_8H_2OPIMM) { \
2048 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 8H, 4S, kInput32bits##input, \
2049 kInput16bitsImm##input_imm); \
2050 } \
2051 SIMTEST(mnemonic##2_4S_2OPIMM) { \
2052 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 4S, 2D, kInput64bits##input, \
2053 kInput32bitsImm##input_imm); \
2054 }
2055
2056 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \
2057 SIMTEST(mnemonic##_B_2OPIMM) { \
2058 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, B, H, kInput16bits##input, \
2059 kInput8bitsImm##input_imm); \
2060 } \
2061 SIMTEST(mnemonic##_H_2OPIMM) { \
2062 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, H, S, kInput32bits##input, \
2063 kInput16bitsImm##input_imm); \
2064 } \
2065 SIMTEST(mnemonic##_S_2OPIMM) { \
2066 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, D, kInput64bits##input, \
2067 kInput32bitsImm##input_imm); \
2068 }
2069
2070 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \
2071 SIMTEST(mnemonic##_2S_2OPIMM) { \
2072 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInputFloat##Basic, \
2073 kInputDoubleImm##input_imm) \
2074 } \
2075 SIMTEST(mnemonic##_4S_2OPIMM) { \
2076 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInputFloat##input, \
2077 kInputDoubleImm##input_imm); \
2078 } \
2079 SIMTEST(mnemonic##_2D_2OPIMM) { \
2080 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInputDouble##input, \
2081 kInputDoubleImm##input_imm); \
2082 }
2083
2084 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \
2085 SIMTEST(mnemonic##_2S_2OPIMM) { \
2086 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInputFloat##Basic, \
2087 kInput32bitsImm##input_imm) \
2088 } \
2089 SIMTEST(mnemonic##_4S_2OPIMM) { \
2090 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInputFloat##input, \
2091 kInput32bitsImm##input_imm) \
2092 } \
2093 SIMTEST(mnemonic##_2D_2OPIMM) { \
2094 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInputDouble##input, \
2095 kInput64bitsImm##input_imm) \
2096 }
2097
2098 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \
2099 SIMTEST(mnemonic##_S_2OPIMM) { \
2100 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInputFloat##Basic, \
2101 kInput32bitsImm##input_imm) \
2102 } \
2103 SIMTEST(mnemonic##_D_2OPIMM) { \
2104 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInputDouble##input, \
2105 kInput64bitsImm##input_imm) \
2106 }
2107
2108 #define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm) \
2109 SIMTEST(mnemonic##_2S_2OPIMM) { \
2110 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInput32bits##input, \
2111 kInput32bitsImm##input_imm); \
2112 } \
2113 SIMTEST(mnemonic##_4S_2OPIMM) { \
2114 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInput32bits##input, \
2115 kInput32bitsImm##input_imm); \
2116 } \
2117 SIMTEST(mnemonic##_2D_2OPIMM) { \
2118 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInput64bits##input, \
2119 kInput64bitsImm##input_imm); \
2120 }
2121
2122 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \
2123 SIMTEST(mnemonic##_D_2OPIMM) { \
2124 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInput64bits##input, \
2125 kInput64bitsImm##input_imm); \
2126 }
2127
2128 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) \
2129 SIMTEST(mnemonic##_S_2OPIMM) { \
2130 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInput32bits##input, \
2131 kInput32bitsImm##input_imm); \
2132 } \
2133 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)
2134
2135 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \
2136 SIMTEST(mnemonic##_D_2OPIMM) { \
2137 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInputDouble##input, \
2138 kInputDoubleImm##input_imm); \
2139 }
2140
2141 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm) \
2142 SIMTEST(mnemonic##_S_2OPIMM) { \
2143 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInputFloat##input, \
2144 kInputDoubleImm##input_imm); \
2145 } \
2146 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)
2147
2148 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \
2149 SIMTEST(mnemonic##_B_2OPIMM) { \
2150 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, B, B, kInput8bits##input, \
2151 kInput8bitsImm##input_imm); \
2152 } \
2153 SIMTEST(mnemonic##_H_2OPIMM) { \
2154 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, H, H, kInput16bits##input, \
2155 kInput16bitsImm##input_imm); \
2156 } \
2157 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm)
2158
2159 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \
2160 SIMTEST(mnemonic##_8H_2OPIMM) { \
2161 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, 8B, kInput8bits##input, \
2162 kInput8bitsImm##input_imm); \
2163 } \
2164 SIMTEST(mnemonic##_4S_2OPIMM) { \
2165 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4H, kInput16bits##input, \
2166 kInput16bitsImm##input_imm); \
2167 } \
2168 SIMTEST(mnemonic##_2D_2OPIMM) { \
2169 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2S, kInput32bits##input, \
2170 kInput32bitsImm##input_imm); \
2171 } \
2172 SIMTEST(mnemonic##2_8H_2OPIMM) { \
2173 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 8H, 16B, kInput8bits##input, \
2174 kInput8bitsImm##input_imm); \
2175 } \
2176 SIMTEST(mnemonic##2_4S_2OPIMM) { \
2177 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 4S, 8H, kInput16bits##input, \
2178 kInput16bitsImm##input_imm); \
2179 } \
2180 SIMTEST(mnemonic##2_2D_2OPIMM) { \
2181 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 2D, 4S, kInput32bits##input, \
2182 kInput32bitsImm##input_imm); \
2183 }
2184
2185 #define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, vdform, vnform, vmform, \
2186 input_d, input_n, input_m, indices) \
2187 { \
2188 CALL_TEST_NEON_HELPER_ByElement(mnemonic, vdform, vnform, vmform, input_d, \
2189 input_n, input_m, indices); \
2190 }
2191
2192 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \
2193 SIMTEST(mnemonic##_4H_4H_H) { \
2194 CALL_TEST_NEON_HELPER_BYELEMENT( \
2195 mnemonic, 4H, 4H, H, kInput16bits##input_d, kInput16bits##input_n, \
2196 kInput16bits##input_m, kInputHIndices); \
2197 } \
2198 SIMTEST(mnemonic##_8H_8H_H) { \
2199 CALL_TEST_NEON_HELPER_BYELEMENT( \
2200 mnemonic, 8H, 8H, H, kInput16bits##input_d, kInput16bits##input_n, \
2201 kInput16bits##input_m, kInputHIndices); \
2202 } \
2203 SIMTEST(mnemonic##_2S_2S_S) { \
2204 CALL_TEST_NEON_HELPER_BYELEMENT( \
2205 mnemonic, 2S, 2S, S, kInput32bits##input_d, kInput32bits##input_n, \
2206 kInput32bits##input_m, kInputSIndices); \
2207 } \
2208 SIMTEST(mnemonic##_4S_4S_S) { \
2209 CALL_TEST_NEON_HELPER_BYELEMENT( \
2210 mnemonic, 4S, 4S, S, kInput32bits##input_d, kInput32bits##input_n, \
2211 kInput32bits##input_m, kInputSIndices); \
2212 }
2213
2214 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \
2215 SIMTEST(mnemonic##_H_H_H) { \
2216 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, H, H, H, kInput16bits##input_d, \
2217 kInput16bits##input_n, \
2218 kInput16bits##input_m, kInputHIndices); \
2219 } \
2220 SIMTEST(mnemonic##_S_S_S) { \
2221 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, S, S, kInput32bits##input_d, \
2222 kInput32bits##input_n, \
2223 kInput32bits##input_m, kInputSIndices); \
2224 }
2225
2226 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \
2227 SIMTEST(mnemonic##_2S_2S_S) { \
2228 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, 2S, 2S, S, kInputFloat##input_d, \
2229 kInputFloat##input_n, \
2230 kInputFloat##input_m, kInputSIndices); \
2231 } \
2232 SIMTEST(mnemonic##_4S_4S_S) { \
2233 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, 4S, 4S, S, kInputFloat##input_d, \
2234 kInputFloat##input_n, \
2235 kInputFloat##input_m, kInputSIndices); \
2236 } \
2237 SIMTEST(mnemonic##_2D_2D_D) { \
2238 CALL_TEST_NEON_HELPER_BYELEMENT( \
2239 mnemonic, 2D, 2D, D, kInputDouble##input_d, kInputDouble##input_n, \
2240 kInputDouble##input_m, kInputDIndices); \
2241 }
2242
2243 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \
2244 SIMTEST(mnemonic##_S_S_S) { \
2245 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, S, S, kInputFloat##inp_d, \
2246 kInputFloat##inp_n, kInputFloat##inp_m, \
2247 kInputSIndices); \
2248 } \
2249 SIMTEST(mnemonic##_D_D_D) { \
2250 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, D, D, D, kInputDouble##inp_d, \
2251 kInputDouble##inp_n, kInputDouble##inp_m, \
2252 kInputDIndices); \
2253 }
2254
2255 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \
2256 SIMTEST(mnemonic##_4S_4H_H) { \
2257 CALL_TEST_NEON_HELPER_BYELEMENT( \
2258 mnemonic, 4S, 4H, H, kInput32bits##input_d, kInput16bits##input_n, \
2259 kInput16bits##input_m, kInputHIndices); \
2260 } \
2261 SIMTEST(mnemonic##2_4S_8H_H) { \
2262 CALL_TEST_NEON_HELPER_BYELEMENT( \
2263 mnemonic##2, 4S, 8H, H, kInput32bits##input_d, kInput16bits##input_n, \
2264 kInput16bits##input_m, kInputHIndices); \
2265 } \
2266 SIMTEST(mnemonic##_2D_2S_S) { \
2267 CALL_TEST_NEON_HELPER_BYELEMENT( \
2268 mnemonic, 2D, 2S, S, kInput64bits##input_d, kInput32bits##input_n, \
2269 kInput32bits##input_m, kInputSIndices); \
2270 } \
2271 SIMTEST(mnemonic##2_2D_4S_S) { \
2272 CALL_TEST_NEON_HELPER_BYELEMENT( \
2273 mnemonic##2, 2D, 4S, S, kInput64bits##input_d, kInput32bits##input_n, \
2274 kInput32bits##input_m, kInputSIndices); \
2275 }
2276
2277 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic, input_d, input_n, \
2278 input_m) \
2279 SIMTEST(mnemonic##_S_H_H) { \
2280 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, H, H, kInput32bits##input_d, \
2281 kInput16bits##input_n, \
2282 kInput16bits##input_m, kInputHIndices); \
2283 } \
2284 SIMTEST(mnemonic##_D_S_S) { \
2285 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, D, S, S, kInput64bits##input_d, \
2286 kInput32bits##input_n, \
2287 kInput32bits##input_m, kInputSIndices); \
2288 }
2289
2290 #define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, variant, input_d, input_imm1, \
2291 input_n, input_imm2) \
2292 { \
2293 CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, mnemonic, \
2294 variant, variant, input_d, input_imm1, \
2295 input_n, input_imm2); \
2296 }
2297
2298 #define DEFINE_TEST_NEON_2OP2IMM(mnemonic, input_d, input_imm1, input_n, \
2299 input_imm2) \
2300 SIMTEST(mnemonic##_B) { \
2301 CALL_TEST_NEON_HELPER_2OP2IMM( \
2302 mnemonic, 16B, kInput8bits##input_d, kInput8bitsImm##input_imm1, \
2303 kInput8bits##input_n, kInput8bitsImm##input_imm2); \
2304 } \
2305 SIMTEST(mnemonic##_H) { \
2306 CALL_TEST_NEON_HELPER_2OP2IMM( \
2307 mnemonic, 8H, kInput16bits##input_d, kInput16bitsImm##input_imm1, \
2308 kInput16bits##input_n, kInput16bitsImm##input_imm2); \
2309 } \
2310 SIMTEST(mnemonic##_S) { \
2311 CALL_TEST_NEON_HELPER_2OP2IMM( \
2312 mnemonic, 4S, kInput32bits##input_d, kInput32bitsImm##input_imm1, \
2313 kInput32bits##input_n, kInput32bitsImm##input_imm2); \
2314 } \
2315 SIMTEST(mnemonic##_D) { \
2316 CALL_TEST_NEON_HELPER_2OP2IMM( \
2317 mnemonic, 2D, kInput64bits##input_d, kInput64bitsImm##input_imm1, \
2318 kInput64bits##input_n, kInput64bitsImm##input_imm2); \
2319 }
2320
2321 // Advanced SIMD copy.
2322 DEFINE_TEST_NEON_2OP2IMM(ins, Basic, LaneCountFromZero, Basic,
2323 LaneCountFromZero)
2324 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)
2325
2326 // Advanced SIMD scalar copy.
2327 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero)
2328
2329 // Advanced SIMD three same.
2330 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic)
2331 DEFINE_TEST_NEON_3SAME(sqadd, Basic)
2332 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic)
2333 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic)
2334 DEFINE_TEST_NEON_3SAME(sqsub, Basic)
2335 DEFINE_TEST_NEON_3SAME(cmgt, Basic)
2336 DEFINE_TEST_NEON_3SAME(cmge, Basic)
2337 DEFINE_TEST_NEON_3SAME(sshl, Basic)
2338 DEFINE_TEST_NEON_3SAME(sqshl, Basic)
2339 DEFINE_TEST_NEON_3SAME(srshl, Basic)
2340 DEFINE_TEST_NEON_3SAME(sqrshl, Basic)
2341 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic)
2342 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic)
2343 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic)
2344 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic)
2345 DEFINE_TEST_NEON_3SAME(add, Basic)
2346 DEFINE_TEST_NEON_3SAME(cmtst, Basic)
2347 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic)
2348 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic)
2349 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic)
2350 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic)
2351 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic)
2352 DEFINE_TEST_NEON_3SAME(addp, Basic)
2353 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic)
2354 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic)
2355 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic)
2356 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic)
2357 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic)
2358 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic)
2359 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic)
2360 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic)
2361 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic)
2362 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic)
2363 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic)
2364 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic)
2365 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic)
2366 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic)
2367 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic)
2368 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic)
2369 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic)
2370 DEFINE_TEST_NEON_3SAME(uqadd, Basic)
2371 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic)
2372 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic)
2373 DEFINE_TEST_NEON_3SAME(uqsub, Basic)
2374 DEFINE_TEST_NEON_3SAME(cmhi, Basic)
2375 DEFINE_TEST_NEON_3SAME(cmhs, Basic)
2376 DEFINE_TEST_NEON_3SAME(ushl, Basic)
2377 DEFINE_TEST_NEON_3SAME(uqshl, Basic)
2378 DEFINE_TEST_NEON_3SAME(urshl, Basic)
2379 DEFINE_TEST_NEON_3SAME(uqrshl, Basic)
2380 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic)
2381 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic)
2382 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic)
2383 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic)
2384 DEFINE_TEST_NEON_3SAME(sub, Basic)
2385 DEFINE_TEST_NEON_3SAME(cmeq, Basic)
2386 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic)
2387 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic)
2388 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic)
2389 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic)
2390 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic)
2391 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic)
2392 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic)
2393 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic)
2394 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic)
2395 DEFINE_TEST_NEON_3SAME_FP(facge, Basic)
2396 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic)
2397 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic)
2398 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic)
2399 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic)
2400 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic)
2401 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic)
2402 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic)
2403 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic)
2404 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic)
2405 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic)
2406 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic)
2407
2408 // Advanced SIMD scalar three same.
2409 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic)
2410 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic)
2411 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic)
2412 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic)
2413 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic)
2414 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic)
2415 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic)
2416 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic)
2417 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic)
2418 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic)
2419 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic)
2420 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic)
2421 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic)
2422 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic)
2423 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic)
2424 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic)
2425 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic)
2426 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic)
2427 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic)
2428 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic)
2429 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic)
2430 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic)
2431 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic)
2432 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic)
2433 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic)
2434 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic)
2435 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic)
2436 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic)
2437 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic)
2438 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic)
2439 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic)
2440
2441 // Advanced SIMD three different.
2442 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic)
2443 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic)
2444 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic)
2445 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic)
2446 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic)
2447 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic)
2448 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic)
2449 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic)
2450 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic)
2451 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic)
2452 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic)
2453 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic)
2454 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic)
2455 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic)
2456 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic)
2457 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic)
2458 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic)
2459 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic)
2460 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic)
2461 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic)
2462 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic)
2463 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic)
2464 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic)
2465 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic)
2466 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic)
2467 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic)
2468
2469 // Advanced SIMD scalar three different.
2470 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic)
2471 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic)
2472 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic)
2473
2474 // Advanced SIMD scalar pairwise.
2475 SIMTEST(addp_SCALAR) {
2476 CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic);
2477 }
2478 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic)
2479 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic)
2480 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic)
2481 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic)
2482 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic)
2483
2484 // Advanced SIMD shift by immediate.
2485 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth)
2486 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth)
2487 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth)
2488 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth)
2489 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero)
2490 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero)
2491 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth)
2492 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)
2493 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)
2494 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)
2495 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)
2496 DEFINE_TEST_NEON_2OPIMM_SD(scvtf, FixedPointConversions,
2497 TypeWidthFromZeroToWidth)
2498 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
2499 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)
2500 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth)
2501 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth)
2502 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth)
2503 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth)
2504 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero)
2505 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero)
2506 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero)
2507 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth)
2508 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)
2509 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)
2510 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)
2511 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)
2512 DEFINE_TEST_NEON_2OPIMM_SD(ucvtf, FixedPointConversions,
2513 TypeWidthFromZeroToWidth)
2514 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
2515
2516 // Advanced SIMD scalar shift by immediate..
2517 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth)
2518 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth)
2519 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth)
2520 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth)
2521 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)
2522 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)
2523 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)
2524 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)
2525 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf, FixedPointConversions,
2526 TypeWidthFromZeroToWidth)
2527 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)
2528 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)
2529 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth)
2530 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth)
2531 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth)
2532 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth)
2533 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero)
2534 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero)
2535 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero)
2536 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)
2537 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)
2538 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)
2539 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)
2540 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf, FixedPointConversions,
2541 TypeWidthFromZeroToWidth)
2542 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)
2543
2544 // Advanced SIMD two-register miscellaneous.
2545 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic)
2546 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic)
2547 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic)
2548 DEFINE_TEST_NEON_2SAME(suqadd, Basic)
2549 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic)
2550 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic)
2551 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic)
2552 DEFINE_TEST_NEON_2SAME(sqabs, Basic)
2553 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero)
2554 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero)
2555 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero)
2556 DEFINE_TEST_NEON_2SAME(abs, Basic)
2557 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic)
2558 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic)
2559 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions)
2560 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions)
2561 DEFINE_TEST_NEON_2SAME_FP(frintn, Conversions)
2562 DEFINE_TEST_NEON_2SAME_FP(frintm, Conversions)
2563 DEFINE_TEST_NEON_2SAME_FP(fcvtns, Conversions)
2564 DEFINE_TEST_NEON_2SAME_FP(fcvtms, Conversions)
2565 DEFINE_TEST_NEON_2SAME_FP(fcvtas, Conversions)
2566 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
2567 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero)
2568 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero)
2569 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero)
2570 DEFINE_TEST_NEON_2SAME_FP(fabs, Basic)
2571 DEFINE_TEST_NEON_2SAME_FP(frintp, Conversions)
2572 DEFINE_TEST_NEON_2SAME_FP(frintz, Conversions)
2573 DEFINE_TEST_NEON_2SAME_FP(fcvtps, Conversions)
2574 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
2575 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic)
2576 DEFINE_TEST_NEON_2SAME_FP(frecpe, Basic)
2577 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic)
2578 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic)
2579 DEFINE_TEST_NEON_2SAME(usqadd, Basic)
2580 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic)
2581 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic)
2582 DEFINE_TEST_NEON_2SAME(sqneg, Basic)
2583 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero)
2584 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero)
2585 DEFINE_TEST_NEON_2SAME(neg, Basic)
2586 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic)
2587 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL)
2588 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic)
2589 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions)
2590 DEFINE_TEST_NEON_2SAME_FP(frinta, Conversions)
2591 DEFINE_TEST_NEON_2SAME_FP(frintx, Conversions)
2592 DEFINE_TEST_NEON_2SAME_FP(fcvtnu, Conversions)
2593 DEFINE_TEST_NEON_2SAME_FP(fcvtmu, Conversions)
2594 DEFINE_TEST_NEON_2SAME_FP(fcvtau, Conversions)
2595 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
2596 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic)
2597 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic)
2598 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero)
2599 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero)
2600 DEFINE_TEST_NEON_2SAME_FP(fneg, Basic)
2601 DEFINE_TEST_NEON_2SAME_FP(frinti, Conversions)
2602 DEFINE_TEST_NEON_2SAME_FP(fcvtpu, Conversions)
2603 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
2604 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic)
2605 DEFINE_TEST_NEON_2SAME_FP(frsqrte, Basic)
2606 DEFINE_TEST_NEON_2SAME_FP(fsqrt, Basic)
2607
2608 // Advanced SIMD scalar two-register miscellaneous.
2609 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic)
2610 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic)
2611 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero)
2612 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero)
2613 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero)
2614 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic)
2615 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic)
2616 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtns, Conversions)
2617 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtms, Conversions)
2618 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtas, Conversions)
2619 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.
2620 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmgt, Basic, Zero)
2621 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmeq, Basic, Zero)
2622 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmlt, Basic, Zero)
2623 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtps, Conversions)
2624 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.
2625 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpe, Basic)
2626 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpx, Basic)
2627 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic)
2628 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic)
2629 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero)
2630 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero)
2631 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic)
2632 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic)
2633 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic)
2634 SIMTEST(fcvtxn_SCALAR) {
2635 CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions);
2636 }
2637 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtnu, Conversions)
2638 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtmu, Conversions)
2639 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtau, Conversions)
2640 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.
2641 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmge, Basic, Zero)
2642 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmle, Basic, Zero)
2643 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtpu, Conversions)
2644 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.
2645 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frsqrte, Basic)
2646
2647 // Advanced SIMD across lanes.
2648 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic)
2649 DEFINE_TEST_NEON_ACROSS(smaxv, Basic)
2650 DEFINE_TEST_NEON_ACROSS(sminv, Basic)
2651 DEFINE_TEST_NEON_ACROSS(addv, Basic)
2652 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic)
2653 DEFINE_TEST_NEON_ACROSS(umaxv, Basic)
2654 DEFINE_TEST_NEON_ACROSS(uminv, Basic)
2655 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic)
2656 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic)
2657 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic)
2658 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic)
2659
2660 // Advanced SIMD permute.
2661 DEFINE_TEST_NEON_3SAME(uzp1, Basic)
2662 DEFINE_TEST_NEON_3SAME(trn1, Basic)
2663 DEFINE_TEST_NEON_3SAME(zip1, Basic)
2664 DEFINE_TEST_NEON_3SAME(uzp2, Basic)
2665 DEFINE_TEST_NEON_3SAME(trn2, Basic)
2666 DEFINE_TEST_NEON_3SAME(zip2, Basic)
2667
2668 // Advanced SIMD vector x indexed element.
2669 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic)
2670 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic)
2671 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic)
2672 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic)
2673 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic)
2674 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic)
2675 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic)
2676 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic)
2677 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic)
2678 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic)
2679 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic)
2680 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic)
2681 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic)
2682 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic)
2683 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic)
2684 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic)
2685 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic)
2686 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic)
2687
2688 // Advanced SIMD scalar x indexed element.
2689 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic)
2690 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic)
2691 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic)
2692 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic)
2693 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic)
2694 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic)
2695 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic)
2696 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic)
2697 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698