OLD | NEW |
| (Empty) |
1 // Copyright 2016 the V8 project authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include <stdio.h> | |
6 #include <stdlib.h> | |
7 #include <string.h> | |
8 #include <cmath> | |
9 #include <limits> | |
10 | |
11 #include "src/arm64/decoder-arm64-inl.h" | |
12 #include "src/arm64/disasm-arm64.h" | |
13 #include "src/arm64/simulator-arm64.h" | |
14 #include "src/arm64/utils-arm64.h" | |
15 #include "src/base/platform/platform.h" | |
16 #include "src/base/utils/random-number-generator.h" | |
17 #include "src/macro-assembler-inl.h" | |
18 #include "test/cctest/cctest.h" | |
19 #include "test/cctest/test-simulator-neon-inputs-arm64.h" | |
20 #include "test/cctest/test-simulator-neon-traces-arm64.h" | |
21 #include "test/cctest/test-utils-arm64.h" | |
22 | |
23 using namespace v8::internal; | |
24 | |
25 // Test infrastructure. | |
26 // | |
27 // Tests are functions which accept no parameters and have no return values. | |
28 // The testing code should not perform an explicit return once completed. For | |
29 // example to test the mov immediate instruction a very simple test would be: | |
30 // | |
31 // SIMTEST(mov_x0_one) { | |
32 // SETUP(); | |
33 // | |
34 // START(); | |
35 // __ mov(x0, Operand(1)); | |
36 // END(); | |
37 // | |
38 // RUN(); | |
39 // | |
40 // CHECK_EQUAL_64(1, x0); | |
41 // | |
42 // TEARDOWN(); | |
43 // } | |
44 // | |
45 // Within a START ... END block all registers but sp can be modified. sp has to | |
46 // be explicitly saved/restored. The END() macro replaces the function return | |
47 // so it may appear multiple times in a test if the test has multiple exit | |
48 // points. | |
49 // | |
50 // Once the test has been run all integer and floating point registers as well | |
51 // as flags are accessible through a RegisterDump instance, see | |
52 // test-utils-arm64.h for more info on RegisterDump. | |
53 // | |
54 // We provide some helper assert to handle common cases: | |
55 // | |
56 // CHECK_EQUAL_32(int32_t, int32_t) | |
57 // CHECK_EQUAL_FP32(float, float) | |
58 // CHECK_EQUAL_32(int32_t, W register) | |
59 // CHECK_EQUAL_FP32(float, S register) | |
60 // CHECK_EQUAL_64(int64_t, int64_t) | |
61 // CHECK_EQUAL_FP64(double, double) | |
62 // CHECK_EQUAL_64(int64_t, X register) | |
63 // CHECK_EQUAL_64(X register, X register) | |
64 // CHECK_EQUAL_FP64(double, D register) | |
65 // | |
66 // e.g. CHECK_EQUAL_64(0.5, d30); | |
67 // | |
68 // If more advance computation is required before the assert then access the | |
69 // RegisterDump named core directly: | |
70 // | |
71 // CHECK_EQUAL_64(0x1234, core.xreg(0) & 0xffff); | |
72 | |
73 #if 0 // TODO(all): enable. | |
74 static v8::Persistent<v8::Context> env; | |
75 | |
76 static void InitializeVM() { | |
77 if (env.IsEmpty()) { | |
78 env = v8::Context::New(); | |
79 } | |
80 } | |
81 #endif | |
82 | |
83 #define __ masm. | |
84 #define SIMTEST(name) TEST(SIM_##name) | |
85 | |
86 #define BUF_SIZE 8192 | |
87 #define SETUP() SETUP_SIZE(BUF_SIZE) | |
88 | |
89 #define INIT_V8() CcTest::InitializeVM(); | |
90 | |
91 #ifdef USE_SIMULATOR | |
92 | |
93 // Run tests with the simulator. | |
94 #define SETUP_SIZE(buf_size) \ | |
95 Isolate* isolate = CcTest::i_isolate(); \ | |
96 HandleScope scope(isolate); \ | |
97 CHECK(isolate != NULL); \ | |
98 byte* buf = new byte[buf_size]; \ | |
99 MacroAssembler masm(isolate, buf, buf_size, \ | |
100 v8::internal::CodeObjectRequired::kYes); \ | |
101 Decoder<DispatchingDecoderVisitor>* decoder = \ | |
102 new Decoder<DispatchingDecoderVisitor>(); \ | |
103 Simulator simulator(decoder); \ | |
104 RegisterDump core; | |
105 | |
106 // Reset the assembler and simulator, so that instructions can be generated, | |
107 // but don't actually emit any code. This can be used by tests that need to | |
108 // emit instructions at the start of the buffer. Note that START_AFTER_RESET | |
109 // must be called before any callee-saved register is modified, and before an | |
110 // END is encountered. | |
111 // | |
112 // Most tests should call START, rather than call RESET directly. | |
113 #define RESET() \ | |
114 __ Reset(); \ | |
115 simulator.ResetState(); | |
116 | |
117 #define START_AFTER_RESET() \ | |
118 __ SetStackPointer(csp); \ | |
119 __ PushCalleeSavedRegisters(); \ | |
120 __ Debug("Start test.", __LINE__, TRACE_ENABLE | LOG_ALL); | |
121 | |
122 #define START() \ | |
123 RESET(); \ | |
124 START_AFTER_RESET(); | |
125 | |
126 #define RUN() simulator.RunFrom(reinterpret_cast<Instruction*>(buf)) | |
127 | |
128 #define END() \ | |
129 __ Debug("End test.", __LINE__, TRACE_DISABLE | LOG_ALL); \ | |
130 core.Dump(&masm); \ | |
131 __ PopCalleeSavedRegisters(); \ | |
132 __ Ret(); \ | |
133 __ GetCode(NULL); | |
134 | |
135 #define TEARDOWN() delete[] buf; | |
136 | |
137 #else // ifdef USE_SIMULATOR. | |
138 // Run the test on real hardware or models. | |
139 #define SETUP_SIZE(buf_size) \ | |
140 Isolate* isolate = CcTest::i_isolate(); \ | |
141 HandleScope scope(isolate); \ | |
142 CHECK(isolate != NULL); \ | |
143 size_t actual_size; \ | |
144 byte* buf = static_cast<byte*>( \ | |
145 v8::base::OS::Allocate(buf_size, &actual_size, true)); \ | |
146 MacroAssembler masm(isolate, buf, actual_size, \ | |
147 v8::internal::CodeObjectRequired::kYes); \ | |
148 RegisterDump core; | |
149 | |
150 #define RESET() \ | |
151 __ Reset(); \ | |
152 /* Reset the machine state (like simulator.ResetState()). */ \ | |
153 __ Msr(NZCV, xzr); \ | |
154 __ Msr(FPCR, xzr); | |
155 | |
156 #define START_AFTER_RESET() \ | |
157 __ SetStackPointer(csp); \ | |
158 __ PushCalleeSavedRegisters(); | |
159 | |
160 #define START() \ | |
161 RESET(); \ | |
162 START_AFTER_RESET(); | |
163 | |
164 #define RUN() \ | |
165 Assembler::FlushICache(isolate, buf, masm.SizeOfGeneratedCode()); \ | |
166 { \ | |
167 void (*test_function)(void); \ | |
168 memcpy(&test_function, &buf, sizeof(buf)); \ | |
169 test_function(); \ | |
170 } | |
171 | |
172 #define END() \ | |
173 core.Dump(&masm); \ | |
174 __ PopCalleeSavedRegisters(); \ | |
175 __ Ret(); \ | |
176 __ GetCode(NULL); | |
177 | |
178 #define TEARDOWN() v8::base::OS::Free(buf, actual_size); | |
179 | |
180 #endif // ifdef USE_SIMULATOR. | |
181 | |
182 #define CHECK_EQUAL_NZCV(expected) CHECK(EqualNzcv(expected, core.flags_nzcv())) | |
183 | |
184 #define CHECK_EQUAL_REGISTERS(expected) CHECK(EqualRegisters(&expected, &core)) | |
185 | |
186 #define CHECK_EQUAL_32(expected, result) \ | |
187 CHECK(Equal32(static_cast<uint32_t>(expected), &core, result)) | |
188 | |
189 #define CHECK_EQUAL_FP32(expected, result) \ | |
190 CHECK(EqualFP32(expected, &core, result)) | |
191 | |
192 #define CHECK_EQUAL_64(expected, result) CHECK(Equal64(expected, &core, result)) | |
193 | |
194 #define CHECK_EQUAL_FP64(expected, result) \ | |
195 CHECK(EqualFP64(expected, &core, result)) | |
196 | |
197 #ifdef DEBUG | |
198 #define CHECK_LITERAL_POOL_SIZE(expected) \ | |
199 CHECK((expected) == (__ LiteralPoolSize())) | |
200 #else | |
201 #define CHECK_LITERAL_POOL_SIZE(expected) ((void)0) | |
202 #endif | |
203 | |
204 // The maximum number of errors to report in detail for each test. | |
205 static const unsigned kErrorReportLimit = 8; | |
206 | |
207 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd, | |
208 const VRegister& vn); | |
209 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd, | |
210 const VRegister& vn, | |
211 const VRegister& vm); | |
212 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd, | |
213 const VRegister& vn, | |
214 const VRegister& vm, | |
215 int vm_index); | |
216 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)( | |
217 const VRegister& vd, int imm1, const VRegister& vn, int imm2); | |
218 | |
219 // This helps using the same typename for both the function pointer | |
220 // and the array of immediates passed to helper routines. | |
221 template <typename T> | |
222 class Test2OpImmediateNEONHelper_t { | |
223 public: | |
224 typedef void (MacroAssembler::*mnemonic)(const VRegister& vd, | |
225 const VRegister& vn, T imm); | |
226 }; | |
227 | |
228 namespace { | |
229 | |
230 // Maximum number of hex characters required to represent values of either | |
231 // templated type. | |
232 template <typename Ta, typename Tb> | |
233 unsigned MaxHexCharCount() { | |
234 unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb))); | |
235 return (count * 8) / 4; | |
236 } | |
237 | |
238 // ==== Tests for instructions of the form <INST> VReg, VReg. ==== | |
239 | |
240 void Test1OpNEON_Helper(Test1OpNEONHelper_t helper, uintptr_t inputs_n, | |
241 unsigned inputs_n_length, uintptr_t results, | |
242 VectorFormat vd_form, VectorFormat vn_form) { | |
243 DCHECK_NE(vd_form, kFormatUndefined); | |
244 DCHECK_NE(vn_form, kFormatUndefined); | |
245 | |
246 SETUP(); | |
247 START(); | |
248 | |
249 // Roll up the loop to keep the code size down. | |
250 Label loop_n; | |
251 | |
252 Register out = x0; | |
253 Register inputs_n_base = x1; | |
254 Register inputs_n_last_16bytes = x3; | |
255 Register index_n = x5; | |
256 | |
257 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); | |
258 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
259 | |
260 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); | |
261 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
262 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
263 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); | |
264 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); | |
265 | |
266 // These will be either a D- or a Q-register form, with a single lane | |
267 // (for use in scalar load and store operations). | |
268 VRegister vd = VRegister::Create(0, vd_bits); | |
269 VRegister vn = v1.V16B(); | |
270 VRegister vntmp = v3.V16B(); | |
271 | |
272 // These will have the correct format for use when calling 'helper'. | |
273 VRegister vd_helper = VRegister::Create(0, vd_bits, vd_lane_count); | |
274 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); | |
275 | |
276 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. | |
277 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); | |
278 | |
279 __ Mov(out, results); | |
280 | |
281 __ Mov(inputs_n_base, inputs_n); | |
282 __ Mov(inputs_n_last_16bytes, | |
283 inputs_n + (vn_lane_bytes * inputs_n_length) - 16); | |
284 | |
285 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); | |
286 | |
287 __ Mov(index_n, 0); | |
288 __ Bind(&loop_n); | |
289 | |
290 __ Ldr(vntmp_single, | |
291 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); | |
292 __ Ext(vn, vn, vntmp, vn_lane_bytes); | |
293 | |
294 // Set the destination to zero. | |
295 | |
296 // TODO(all): Setting the destination to values other than zero might be a | |
297 // better test for instructions such as sqxtn2 which may leave parts of V | |
298 // registers unchanged. | |
299 __ Movi(vd.V16B(), 0); | |
300 | |
301 (masm.*helper)(vd_helper, vn_helper); | |
302 | |
303 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex)); | |
304 | |
305 __ Add(index_n, index_n, 1); | |
306 __ Cmp(index_n, inputs_n_length); | |
307 __ B(lo, &loop_n); | |
308 | |
309 END(); | |
310 RUN(); | |
311 TEARDOWN(); | |
312 } | |
313 | |
314 // Test NEON instructions. The inputs_*[] and expected[] arrays should be | |
315 // arrays of rawbit representation of input values. This ensures that | |
316 // exact bit comparisons can be performed. | |
317 template <typename Td, typename Tn> | |
318 void Test1OpNEON(const char* name, Test1OpNEONHelper_t helper, | |
319 const Tn inputs_n[], unsigned inputs_n_length, | |
320 const Td expected[], unsigned expected_length, | |
321 VectorFormat vd_form, VectorFormat vn_form) { | |
322 DCHECK_GT(inputs_n_length, 0U); | |
323 | |
324 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
325 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
326 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
327 | |
328 const unsigned results_length = inputs_n_length; | |
329 std::vector<Td> results(results_length * vd_lane_count, 0); | |
330 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); | |
331 | |
332 Test1OpNEON_Helper( | |
333 helper, reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, | |
334 reinterpret_cast<uintptr_t>(results.data()), vd_form, vn_form); | |
335 | |
336 // Check the results. | |
337 CHECK(expected_length == results_length); | |
338 unsigned error_count = 0; | |
339 unsigned d = 0; | |
340 const char* padding = " "; | |
341 DCHECK_GE(strlen(padding), (lane_len_in_hex + 1)); | |
342 for (unsigned n = 0; n < inputs_n_length; n++, d++) { | |
343 bool error_in_vector = false; | |
344 | |
345 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
346 unsigned output_index = (n * vd_lane_count) + lane; | |
347 | |
348 if (results[output_index] != expected[output_index]) { | |
349 error_in_vector = true; | |
350 break; | |
351 } | |
352 } | |
353 | |
354 if (error_in_vector && (++error_count <= kErrorReportLimit)) { | |
355 printf("%s\n", name); | |
356 printf(" Vn%.*s| Vd%.*s| Expected\n", lane_len_in_hex + 1, padding, | |
357 lane_len_in_hex + 1, padding); | |
358 | |
359 const unsigned first_index_n = | |
360 inputs_n_length - (16 / vn_lane_bytes) + n + 1; | |
361 | |
362 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count); | |
363 lane++) { | |
364 unsigned output_index = (n * vd_lane_count) + lane; | |
365 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; | |
366 | |
367 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 | |
368 " " | |
369 "| 0x%0*" PRIx64 "\n", | |
370 results[output_index] != expected[output_index] ? '*' : ' ', | |
371 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]), | |
372 lane_len_in_hex, static_cast<uint64_t>(results[output_index]), | |
373 lane_len_in_hex, static_cast<uint64_t>(expected[output_index])); | |
374 } | |
375 } | |
376 } | |
377 DCHECK_EQ(d, expected_length); | |
378 if (error_count > kErrorReportLimit) { | |
379 printf("%u other errors follow.\n", error_count - kErrorReportLimit); | |
380 } | |
381 DCHECK_EQ(error_count, 0U); | |
382 } | |
383 | |
384 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ==== | |
385 // where <V> is one of B, H, S or D registers. | |
386 // e.g. saddlv H1, v0.8B | |
387 | |
388 // TODO(all): Change tests to store all lanes of the resulting V register. | |
389 // Some tests store all 128 bits of the resulting V register to | |
390 // check the simulator's behaviour on the rest of the register. | |
391 // This is better than storing the affected lanes only. | |
392 // Change any tests such as the 'Across' template to do the same. | |
393 | |
394 void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper, uintptr_t inputs_n, | |
395 unsigned inputs_n_length, uintptr_t results, | |
396 VectorFormat vd_form, VectorFormat vn_form) { | |
397 DCHECK_NE(vd_form, kFormatUndefined); | |
398 DCHECK_NE(vn_form, kFormatUndefined); | |
399 | |
400 SETUP(); | |
401 START(); | |
402 | |
403 // Roll up the loop to keep the code size down. | |
404 Label loop_n; | |
405 | |
406 Register out = x0; | |
407 Register inputs_n_base = x1; | |
408 Register inputs_n_last_vector = x3; | |
409 Register index_n = x5; | |
410 | |
411 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); | |
412 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); | |
413 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
414 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
415 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); | |
416 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); | |
417 | |
418 // Test destructive operations by (arbitrarily) using the same register for | |
419 // B and S lane sizes. | |
420 bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize); | |
421 | |
422 // These will be either a D- or a Q-register form, with a single lane | |
423 // (for use in scalar load and store operations). | |
424 // Create two aliases for v8; the first is the destination for the tested | |
425 // instruction, the second, the whole Q register to check the results. | |
426 VRegister vd = VRegister::Create(0, vd_bits); | |
427 VRegister vdstr = VRegister::Create(0, kQRegSizeInBits); | |
428 | |
429 VRegister vn = VRegister::Create(1, vn_bits); | |
430 VRegister vntmp = VRegister::Create(3, vn_bits); | |
431 | |
432 // These will have the correct format for use when calling 'helper'. | |
433 VRegister vd_helper = VRegister::Create(0, vn_bits, vn_lane_count); | |
434 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); | |
435 | |
436 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. | |
437 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); | |
438 | |
439 // Same registers for use in the 'ext' instructions. | |
440 VRegister vn_ext = (kDRegSizeInBits == vn_bits) ? vn.V8B() : vn.V16B(); | |
441 VRegister vntmp_ext = | |
442 (kDRegSizeInBits == vn_bits) ? vntmp.V8B() : vntmp.V16B(); | |
443 | |
444 __ Mov(out, results); | |
445 | |
446 __ Mov(inputs_n_base, inputs_n); | |
447 __ Mov(inputs_n_last_vector, | |
448 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count)); | |
449 | |
450 __ Ldr(vn, MemOperand(inputs_n_last_vector)); | |
451 | |
452 __ Mov(index_n, 0); | |
453 __ Bind(&loop_n); | |
454 | |
455 __ Ldr(vntmp_single, | |
456 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); | |
457 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes); | |
458 | |
459 if (destructive) { | |
460 __ Mov(vd_helper, vn_helper); | |
461 (masm.*helper)(vd, vd_helper); | |
462 } else { | |
463 (masm.*helper)(vd, vn_helper); | |
464 } | |
465 | |
466 __ Str(vdstr, MemOperand(out, kQRegSize, PostIndex)); | |
467 | |
468 __ Add(index_n, index_n, 1); | |
469 __ Cmp(index_n, inputs_n_length); | |
470 __ B(lo, &loop_n); | |
471 | |
472 END(); | |
473 RUN(); | |
474 TEARDOWN(); | |
475 } | |
476 | |
477 // Test NEON instructions. The inputs_*[] and expected[] arrays should be | |
478 // arrays of rawbit representation of input values. This ensures that | |
479 // exact bit comparisons can be performed. | |
480 template <typename Td, typename Tn> | |
481 void Test1OpAcrossNEON(const char* name, Test1OpNEONHelper_t helper, | |
482 const Tn inputs_n[], unsigned inputs_n_length, | |
483 const Td expected[], unsigned expected_length, | |
484 VectorFormat vd_form, VectorFormat vn_form) { | |
485 DCHECK_GT(inputs_n_length, 0U); | |
486 | |
487 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
488 const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form); | |
489 | |
490 const unsigned results_length = inputs_n_length; | |
491 std::vector<Td> results(results_length * vd_lanes_per_q, 0); | |
492 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); | |
493 | |
494 Test1OpAcrossNEON_Helper( | |
495 helper, reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, | |
496 reinterpret_cast<uintptr_t>(results.data()), vd_form, vn_form); | |
497 | |
498 // Check the results. | |
499 DCHECK_EQ(expected_length, results_length); | |
500 unsigned error_count = 0; | |
501 unsigned d = 0; | |
502 const char* padding = " "; | |
503 DCHECK_GE(strlen(padding), (lane_len_in_hex + 1)); | |
504 for (unsigned n = 0; n < inputs_n_length; n++, d++) { | |
505 bool error_in_vector = false; | |
506 | |
507 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
508 unsigned expected_index = (n * vd_lane_count) + lane; | |
509 unsigned results_index = (n * vd_lanes_per_q) + lane; | |
510 | |
511 if (results[results_index] != expected[expected_index]) { | |
512 error_in_vector = true; | |
513 break; | |
514 } | |
515 | |
516 // For across operations, the remaining lanes should be zero. | |
517 for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) { | |
518 unsigned results_index = (n * vd_lanes_per_q) + lane; | |
519 if (results[results_index] != 0) { | |
520 error_in_vector = true; | |
521 break; | |
522 } | |
523 } | |
524 } | |
525 | |
526 if (error_in_vector && (++error_count <= kErrorReportLimit)) { | |
527 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
528 | |
529 printf("%s\n", name); | |
530 printf(" Vn%.*s| Vd%.*s| Expected\n", lane_len_in_hex + 1, padding, | |
531 lane_len_in_hex + 1, padding); | |
532 | |
533 for (unsigned lane = 0; lane < vn_lane_count; lane++) { | |
534 unsigned results_index = | |
535 (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane); | |
536 unsigned input_index_n = | |
537 (inputs_n_length - vn_lane_count + n + 1 + lane) % inputs_n_length; | |
538 | |
539 Td expect = 0; | |
540 if ((vn_lane_count - 1) == lane) { | |
541 // This is the last lane to be printed, ie. the least-significant | |
542 // lane, so use the expected value; any other lane should be zero. | |
543 unsigned expected_index = n * vd_lane_count; | |
544 expect = expected[expected_index]; | |
545 } | |
546 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", | |
547 results[results_index] != expect ? '*' : ' ', lane_len_in_hex, | |
548 static_cast<uint64_t>(inputs_n[input_index_n]), lane_len_in_hex, | |
549 static_cast<uint64_t>(results[results_index]), lane_len_in_hex, | |
550 static_cast<uint64_t>(expect)); | |
551 } | |
552 } | |
553 } | |
554 DCHECK_EQ(d, expected_length); | |
555 if (error_count > kErrorReportLimit) { | |
556 printf("%u other errors follow.\n", error_count - kErrorReportLimit); | |
557 } | |
558 DCHECK_EQ(error_count, 0U); | |
559 } | |
560 | |
561 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ==== | |
562 | |
563 void Test2OpNEON_Helper(Test2OpNEONHelper_t helper, uintptr_t inputs_d, | |
564 uintptr_t inputs_n, unsigned inputs_n_length, | |
565 uintptr_t inputs_m, unsigned inputs_m_length, | |
566 uintptr_t results, VectorFormat vd_form, | |
567 VectorFormat vn_form, VectorFormat vm_form) { | |
568 DCHECK_NE(vd_form, kFormatUndefined); | |
569 DCHECK_NE(vn_form, kFormatUndefined); | |
570 DCHECK_NE(vm_form, kFormatUndefined); | |
571 | |
572 SETUP(); | |
573 START(); | |
574 | |
575 // Roll up the loop to keep the code size down. | |
576 Label loop_n, loop_m; | |
577 | |
578 Register out = x0; | |
579 Register inputs_n_base = x1; | |
580 Register inputs_m_base = x2; | |
581 Register inputs_d_base = x3; | |
582 Register inputs_n_last_16bytes = x4; | |
583 Register inputs_m_last_16bytes = x5; | |
584 Register index_n = x6; | |
585 Register index_m = x7; | |
586 | |
587 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); | |
588 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
589 | |
590 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); | |
591 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
592 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
593 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); | |
594 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); | |
595 | |
596 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form); | |
597 const unsigned vm_lane_count = LaneCountFromFormat(vm_form); | |
598 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form); | |
599 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form); | |
600 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form); | |
601 | |
602 // Always load and store 128 bits regardless of the format. | |
603 VRegister vd = v0.V16B(); | |
604 VRegister vn = v1.V16B(); | |
605 VRegister vm = v2.V16B(); | |
606 VRegister vntmp = v3.V16B(); | |
607 VRegister vmtmp = v4.V16B(); | |
608 VRegister vres = v5.V16B(); | |
609 | |
610 // These will have the correct format for calling the 'helper'. | |
611 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); | |
612 VRegister vm_helper = VRegister::Create(2, vm_bits, vm_lane_count); | |
613 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count); | |
614 | |
615 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. | |
616 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); | |
617 VRegister vmtmp_single = VRegister::Create(4, vm_lane_bits); | |
618 | |
619 __ Mov(out, results); | |
620 | |
621 __ Mov(inputs_d_base, inputs_d); | |
622 | |
623 __ Mov(inputs_n_base, inputs_n); | |
624 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16)); | |
625 __ Mov(inputs_m_base, inputs_m); | |
626 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16)); | |
627 | |
628 __ Ldr(vd, MemOperand(inputs_d_base)); | |
629 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); | |
630 __ Ldr(vm, MemOperand(inputs_m_last_16bytes)); | |
631 | |
632 __ Mov(index_n, 0); | |
633 __ Bind(&loop_n); | |
634 | |
635 __ Ldr(vntmp_single, | |
636 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); | |
637 __ Ext(vn, vn, vntmp, vn_lane_bytes); | |
638 | |
639 __ Mov(index_m, 0); | |
640 __ Bind(&loop_m); | |
641 | |
642 __ Ldr(vmtmp_single, | |
643 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2)); | |
644 __ Ext(vm, vm, vmtmp, vm_lane_bytes); | |
645 | |
646 __ Mov(vres, vd); | |
647 | |
648 (masm.*helper)(vres_helper, vn_helper, vm_helper); | |
649 | |
650 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex)); | |
651 | |
652 __ Add(index_m, index_m, 1); | |
653 __ Cmp(index_m, inputs_m_length); | |
654 __ B(lo, &loop_m); | |
655 | |
656 __ Add(index_n, index_n, 1); | |
657 __ Cmp(index_n, inputs_n_length); | |
658 __ B(lo, &loop_n); | |
659 | |
660 END(); | |
661 RUN(); | |
662 TEARDOWN(); | |
663 } | |
664 | |
665 // Test NEON instructions. The inputs_*[] and expected[] arrays should be | |
666 // arrays of rawbit representation of input values. This ensures that | |
667 // exact bit comparisons can be performed. | |
668 template <typename Td, typename Tn, typename Tm> | |
669 void Test2OpNEON(const char* name, Test2OpNEONHelper_t helper, | |
670 const Td inputs_d[], const Tn inputs_n[], | |
671 unsigned inputs_n_length, const Tm inputs_m[], | |
672 unsigned inputs_m_length, const Td expected[], | |
673 unsigned expected_length, VectorFormat vd_form, | |
674 VectorFormat vn_form, VectorFormat vm_form) { | |
675 DCHECK(inputs_n_length > 0 && inputs_m_length > 0); | |
676 | |
677 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form); | |
678 | |
679 const unsigned results_length = inputs_n_length * inputs_m_length; | |
680 std::vector<Td> results(results_length * vd_lane_count); | |
681 const unsigned lane_len_in_hex = | |
682 static_cast<unsigned>(std::max(sizeof(Td), sizeof(Tm)) * 8) / 4; | |
683 | |
684 Test2OpNEON_Helper(helper, reinterpret_cast<uintptr_t>(inputs_d), | |
685 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, | |
686 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length, | |
687 reinterpret_cast<uintptr_t>(results.data()), vd_form, | |
688 vn_form, vm_form); | |
689 | |
690 // Check the results. | |
691 CHECK(expected_length == results_length); | |
692 unsigned error_count = 0; | |
693 unsigned d = 0; | |
694 const char* padding = " "; | |
695 DCHECK_GE(strlen(padding), lane_len_in_hex + 1); | |
696 for (unsigned n = 0; n < inputs_n_length; n++) { | |
697 for (unsigned m = 0; m < inputs_m_length; m++, d++) { | |
698 bool error_in_vector = false; | |
699 | |
700 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
701 unsigned output_index = | |
702 (n * inputs_m_length * vd_lane_count) + (m * vd_lane_count) + lane; | |
703 | |
704 if (results[output_index] != expected[output_index]) { | |
705 error_in_vector = true; | |
706 break; | |
707 } | |
708 } | |
709 | |
710 if (error_in_vector && (++error_count <= kErrorReportLimit)) { | |
711 printf("%s\n", name); | |
712 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n", | |
713 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding, | |
714 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding); | |
715 | |
716 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
717 unsigned output_index = (n * inputs_m_length * vd_lane_count) + | |
718 (m * vd_lane_count) + lane; | |
719 unsigned input_index_n = | |
720 (inputs_n_length - vd_lane_count + n + 1 + lane) % | |
721 inputs_n_length; | |
722 unsigned input_index_m = | |
723 (inputs_m_length - vd_lane_count + m + 1 + lane) % | |
724 inputs_m_length; | |
725 | |
726 printf( | |
727 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 | |
728 " " | |
729 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", | |
730 results[output_index] != expected[output_index] ? '*' : ' ', | |
731 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]), | |
732 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]), | |
733 lane_len_in_hex, static_cast<uint64_t>(inputs_m[input_index_m]), | |
734 lane_len_in_hex, static_cast<uint64_t>(results[output_index]), | |
735 lane_len_in_hex, static_cast<uint64_t>(expected[output_index])); | |
736 } | |
737 } | |
738 } | |
739 } | |
740 DCHECK_EQ(d, expected_length); | |
741 if (error_count > kErrorReportLimit) { | |
742 printf("%u other errors follow.\n", error_count - kErrorReportLimit); | |
743 } | |
744 DCHECK_EQ(error_count, 0U); | |
745 } | |
746 | |
747 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ==== | |
748 | |
749 void TestByElementNEON_Helper(TestByElementNEONHelper_t helper, | |
750 uintptr_t inputs_d, uintptr_t inputs_n, | |
751 unsigned inputs_n_length, uintptr_t inputs_m, | |
752 unsigned inputs_m_length, const int indices[], | |
753 unsigned indices_length, uintptr_t results, | |
754 VectorFormat vd_form, VectorFormat vn_form, | |
755 VectorFormat vm_form) { | |
756 DCHECK_NE(vd_form, kFormatUndefined); | |
757 DCHECK_NE(vn_form, kFormatUndefined); | |
758 DCHECK_NE(vm_form, kFormatUndefined); | |
759 | |
760 SETUP(); | |
761 START(); | |
762 | |
763 // Roll up the loop to keep the code size down. | |
764 Label loop_n, loop_m; | |
765 | |
766 Register out = x0; | |
767 Register inputs_n_base = x1; | |
768 Register inputs_m_base = x2; | |
769 Register inputs_d_base = x3; | |
770 Register inputs_n_last_16bytes = x4; | |
771 Register inputs_m_last_16bytes = x5; | |
772 Register index_n = x6; | |
773 Register index_m = x7; | |
774 | |
775 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); | |
776 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
777 | |
778 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); | |
779 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
780 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
781 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); | |
782 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); | |
783 | |
784 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form); | |
785 const unsigned vm_lane_count = LaneCountFromFormat(vm_form); | |
786 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form); | |
787 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form); | |
788 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form); | |
789 | |
790 // Always load and store 128 bits regardless of the format. | |
791 VRegister vd = v0.V16B(); | |
792 VRegister vn = v1.V16B(); | |
793 VRegister vm = v2.V16B(); | |
794 VRegister vntmp = v3.V16B(); | |
795 VRegister vmtmp = v4.V16B(); | |
796 VRegister vres = v5.V16B(); | |
797 | |
798 // These will have the correct format for calling the 'helper'. | |
799 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); | |
800 VRegister vm_helper = VRegister::Create(2, vm_bits, vm_lane_count); | |
801 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count); | |
802 | |
803 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. | |
804 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); | |
805 VRegister vmtmp_single = VRegister::Create(4, vm_lane_bits); | |
806 | |
807 __ Mov(out, results); | |
808 | |
809 __ Mov(inputs_d_base, inputs_d); | |
810 | |
811 __ Mov(inputs_n_base, inputs_n); | |
812 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16)); | |
813 __ Mov(inputs_m_base, inputs_m); | |
814 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16)); | |
815 | |
816 __ Ldr(vd, MemOperand(inputs_d_base)); | |
817 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); | |
818 __ Ldr(vm, MemOperand(inputs_m_last_16bytes)); | |
819 | |
820 __ Mov(index_n, 0); | |
821 __ Bind(&loop_n); | |
822 | |
823 __ Ldr(vntmp_single, | |
824 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); | |
825 __ Ext(vn, vn, vntmp, vn_lane_bytes); | |
826 | |
827 __ Mov(index_m, 0); | |
828 __ Bind(&loop_m); | |
829 | |
830 __ Ldr(vmtmp_single, | |
831 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2)); | |
832 __ Ext(vm, vm, vmtmp, vm_lane_bytes); | |
833 | |
834 __ Mov(vres, vd); | |
835 { | |
836 for (unsigned i = 0; i < indices_length; i++) { | |
837 (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]); | |
838 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex)); | |
839 } | |
840 } | |
841 | |
842 __ Add(index_m, index_m, 1); | |
843 __ Cmp(index_m, inputs_m_length); | |
844 __ B(lo, &loop_m); | |
845 | |
846 __ Add(index_n, index_n, 1); | |
847 __ Cmp(index_n, inputs_n_length); | |
848 __ B(lo, &loop_n); | |
849 | |
850 END(); | |
851 RUN(); | |
852 TEARDOWN(); | |
853 } | |
854 | |
855 // Test NEON instructions. The inputs_*[] and expected[] arrays should be | |
856 // arrays of rawbit representation of input values. This ensures that | |
857 // exact bit comparisons can be performed. | |
858 template <typename Td, typename Tn, typename Tm> | |
859 void TestByElementNEON(const char* name, TestByElementNEONHelper_t helper, | |
860 const Td inputs_d[], const Tn inputs_n[], | |
861 unsigned inputs_n_length, const Tm inputs_m[], | |
862 unsigned inputs_m_length, const int indices[], | |
863 unsigned indices_length, const Td expected[], | |
864 unsigned expected_length, VectorFormat vd_form, | |
865 VectorFormat vn_form, VectorFormat vm_form) { | |
866 DCHECK_GT(inputs_n_length, 0U); | |
867 DCHECK_GT(inputs_m_length, 0U); | |
868 DCHECK_GT(indices_length, 0U); | |
869 | |
870 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form); | |
871 | |
872 const unsigned results_length = | |
873 inputs_n_length * inputs_m_length * indices_length; | |
874 std::vector<Td> results(results_length * vd_lane_count, 0); | |
875 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>(); | |
876 | |
877 TestByElementNEON_Helper( | |
878 helper, reinterpret_cast<uintptr_t>(inputs_d), | |
879 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, | |
880 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length, indices, | |
881 indices_length, reinterpret_cast<uintptr_t>(results.data()), vd_form, | |
882 vn_form, vm_form); | |
883 | |
884 // Check the results. | |
885 CHECK(expected_length == results_length); | |
886 unsigned error_count = 0; | |
887 unsigned d = 0; | |
888 const char* padding = " "; | |
889 DCHECK_GE(strlen(padding), lane_len_in_hex + 1); | |
890 for (unsigned n = 0; n < inputs_n_length; n++) { | |
891 for (unsigned m = 0; m < inputs_m_length; m++) { | |
892 for (unsigned index = 0; index < indices_length; index++, d++) { | |
893 bool error_in_vector = false; | |
894 | |
895 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
896 unsigned output_index = | |
897 (n * inputs_m_length * indices_length * vd_lane_count) + | |
898 (m * indices_length * vd_lane_count) + (index * vd_lane_count) + | |
899 lane; | |
900 | |
901 if (results[output_index] != expected[output_index]) { | |
902 error_in_vector = true; | |
903 break; | |
904 } | |
905 } | |
906 | |
907 if (error_in_vector && (++error_count <= kErrorReportLimit)) { | |
908 printf("%s\n", name); | |
909 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n", | |
910 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding, | |
911 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding); | |
912 | |
913 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
914 unsigned output_index = | |
915 (n * inputs_m_length * indices_length * vd_lane_count) + | |
916 (m * indices_length * vd_lane_count) + (index * vd_lane_count) + | |
917 lane; | |
918 unsigned input_index_n = | |
919 (inputs_n_length - vd_lane_count + n + 1 + lane) % | |
920 inputs_n_length; | |
921 unsigned input_index_m = | |
922 (inputs_m_length - vd_lane_count + m + 1 + lane) % | |
923 inputs_m_length; | |
924 | |
925 printf( | |
926 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 | |
927 " " | |
928 "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", | |
929 results[output_index] != expected[output_index] ? '*' : ' ', | |
930 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]), | |
931 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]), | |
932 lane_len_in_hex, static_cast<uint64_t>(inputs_m[input_index_m]), | |
933 indices[index], lane_len_in_hex, | |
934 static_cast<uint64_t>(results[output_index]), lane_len_in_hex, | |
935 static_cast<uint64_t>(expected[output_index])); | |
936 } | |
937 } | |
938 } | |
939 } | |
940 } | |
941 DCHECK_EQ(d, expected_length); | |
942 if (error_count > kErrorReportLimit) { | |
943 printf("%u other errors follow.\n", error_count - kErrorReportLimit); | |
944 } | |
945 CHECK(error_count == 0); | |
946 } | |
947 | |
948 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ==== | |
949 | |
950 template <typename Tm> | |
951 void Test2OpImmNEON_Helper( | |
952 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, | |
953 uintptr_t inputs_n, unsigned inputs_n_length, const Tm inputs_m[], | |
954 unsigned inputs_m_length, uintptr_t results, VectorFormat vd_form, | |
955 VectorFormat vn_form) { | |
956 DCHECK(vd_form != kFormatUndefined && vn_form != kFormatUndefined); | |
957 | |
958 SETUP(); | |
959 START(); | |
960 | |
961 // Roll up the loop to keep the code size down. | |
962 Label loop_n; | |
963 | |
964 Register out = x0; | |
965 Register inputs_n_base = x1; | |
966 Register inputs_n_last_16bytes = x3; | |
967 Register index_n = x5; | |
968 | |
969 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); | |
970 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
971 | |
972 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); | |
973 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
974 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
975 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); | |
976 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); | |
977 | |
978 // These will be either a D- or a Q-register form, with a single lane | |
979 // (for use in scalar load and store operations). | |
980 VRegister vd = VRegister::Create(0, vd_bits); | |
981 VRegister vn = v1.V16B(); | |
982 VRegister vntmp = v3.V16B(); | |
983 | |
984 // These will have the correct format for use when calling 'helper'. | |
985 VRegister vd_helper = VRegister::Create(0, vd_bits, vd_lane_count); | |
986 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); | |
987 | |
988 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. | |
989 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); | |
990 | |
991 __ Mov(out, results); | |
992 | |
993 __ Mov(inputs_n_base, inputs_n); | |
994 __ Mov(inputs_n_last_16bytes, | |
995 inputs_n + (vn_lane_bytes * inputs_n_length) - 16); | |
996 | |
997 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); | |
998 | |
999 __ Mov(index_n, 0); | |
1000 __ Bind(&loop_n); | |
1001 | |
1002 __ Ldr(vntmp_single, | |
1003 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); | |
1004 __ Ext(vn, vn, vntmp, vn_lane_bytes); | |
1005 | |
1006 // Set the destination to zero for tests such as '[r]shrn2'. | |
1007 // TODO(all): Setting the destination to values other than zero might be a | |
1008 // better test for shift and accumulate instructions (srsra/ssra/usra/ursra). | |
1009 __ Movi(vd.V16B(), 0); | |
1010 | |
1011 { | |
1012 for (unsigned i = 0; i < inputs_m_length; i++) { | |
1013 (masm.*helper)(vd_helper, vn_helper, inputs_m[i]); | |
1014 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex)); | |
1015 } | |
1016 } | |
1017 | |
1018 __ Add(index_n, index_n, 1); | |
1019 __ Cmp(index_n, inputs_n_length); | |
1020 __ B(lo, &loop_n); | |
1021 | |
1022 END(); | |
1023 RUN(); | |
1024 TEARDOWN(); | |
1025 } | |
1026 | |
1027 // Test NEON instructions. The inputs_*[] and expected[] arrays should be | |
1028 // arrays of rawbit representation of input values. This ensures that | |
1029 // exact bit comparisons can be performed. | |
1030 template <typename Td, typename Tn, typename Tm> | |
1031 void Test2OpImmNEON(const char* name, | |
1032 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, | |
1033 const Tn inputs_n[], unsigned inputs_n_length, | |
1034 const Tm inputs_m[], unsigned inputs_m_length, | |
1035 const Td expected[], unsigned expected_length, | |
1036 VectorFormat vd_form, VectorFormat vn_form) { | |
1037 DCHECK(inputs_n_length > 0 && inputs_m_length > 0); | |
1038 | |
1039 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
1040 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
1041 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
1042 | |
1043 const unsigned results_length = inputs_n_length * inputs_m_length; | |
1044 std::vector<Td> results(results_length * vd_lane_count, 0); | |
1045 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); | |
1046 | |
1047 Test2OpImmNEON_Helper(helper, reinterpret_cast<uintptr_t>(inputs_n), | |
1048 inputs_n_length, inputs_m, inputs_m_length, | |
1049 reinterpret_cast<uintptr_t>(results.data()), vd_form, | |
1050 vn_form); | |
1051 | |
1052 // Check the results. | |
1053 CHECK(expected_length == results_length); | |
1054 unsigned error_count = 0; | |
1055 unsigned d = 0; | |
1056 const char* padding = " "; | |
1057 DCHECK_GE(strlen(padding), lane_len_in_hex + 1); | |
1058 for (unsigned n = 0; n < inputs_n_length; n++) { | |
1059 for (unsigned m = 0; m < inputs_m_length; m++, d++) { | |
1060 bool error_in_vector = false; | |
1061 | |
1062 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
1063 unsigned output_index = | |
1064 (n * inputs_m_length * vd_lane_count) + (m * vd_lane_count) + lane; | |
1065 | |
1066 if (results[output_index] != expected[output_index]) { | |
1067 error_in_vector = true; | |
1068 break; | |
1069 } | |
1070 } | |
1071 | |
1072 if (error_in_vector && (++error_count <= kErrorReportLimit)) { | |
1073 printf("%s\n", name); | |
1074 printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", lane_len_in_hex + 1, | |
1075 padding, lane_len_in_hex, padding, lane_len_in_hex + 1, padding); | |
1076 | |
1077 const unsigned first_index_n = | |
1078 inputs_n_length - (16 / vn_lane_bytes) + n + 1; | |
1079 | |
1080 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count); | |
1081 lane++) { | |
1082 unsigned output_index = (n * inputs_m_length * vd_lane_count) + | |
1083 (m * vd_lane_count) + lane; | |
1084 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; | |
1085 unsigned input_index_m = m; | |
1086 | |
1087 printf( | |
1088 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 | |
1089 " " | |
1090 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", | |
1091 results[output_index] != expected[output_index] ? '*' : ' ', | |
1092 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]), | |
1093 lane_len_in_hex, static_cast<uint64_t>(inputs_m[input_index_m]), | |
1094 lane_len_in_hex, static_cast<uint64_t>(results[output_index]), | |
1095 lane_len_in_hex, static_cast<uint64_t>(expected[output_index])); | |
1096 } | |
1097 } | |
1098 } | |
1099 } | |
1100 DCHECK_EQ(d, expected_length); | |
1101 if (error_count > kErrorReportLimit) { | |
1102 printf("%u other errors follow.\n", error_count - kErrorReportLimit); | |
1103 } | |
1104 CHECK(error_count == 0); | |
1105 } | |
1106 | |
1107 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ==== | |
1108 | |
1109 void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper, | |
1110 uintptr_t inputs_d, const int inputs_imm1[], | |
1111 unsigned inputs_imm1_length, uintptr_t inputs_n, | |
1112 unsigned inputs_n_length, | |
1113 const int inputs_imm2[], | |
1114 unsigned inputs_imm2_length, uintptr_t results, | |
1115 VectorFormat vd_form, VectorFormat vn_form) { | |
1116 DCHECK_NE(vd_form, kFormatUndefined); | |
1117 DCHECK_NE(vn_form, kFormatUndefined); | |
1118 | |
1119 SETUP(); | |
1120 START(); | |
1121 | |
1122 // Roll up the loop to keep the code size down. | |
1123 Label loop_n; | |
1124 | |
1125 Register out = x0; | |
1126 Register inputs_d_base = x1; | |
1127 Register inputs_n_base = x2; | |
1128 Register inputs_n_last_vector = x4; | |
1129 Register index_n = x6; | |
1130 | |
1131 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); | |
1132 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
1133 | |
1134 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); | |
1135 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
1136 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
1137 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); | |
1138 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); | |
1139 | |
1140 // These will be either a D- or a Q-register form, with a single lane | |
1141 // (for use in scalar load and store operations). | |
1142 VRegister vd = VRegister::Create(0, vd_bits); | |
1143 VRegister vn = VRegister::Create(1, vn_bits); | |
1144 VRegister vntmp = VRegister::Create(4, vn_bits); | |
1145 VRegister vres = VRegister::Create(5, vn_bits); | |
1146 | |
1147 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); | |
1148 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count); | |
1149 | |
1150 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. | |
1151 VRegister vntmp_single = VRegister::Create(4, vn_lane_bits); | |
1152 | |
1153 // Same registers for use in the 'ext' instructions. | |
1154 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B(); | |
1155 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B(); | |
1156 | |
1157 __ Mov(out, results); | |
1158 | |
1159 __ Mov(inputs_d_base, inputs_d); | |
1160 | |
1161 __ Mov(inputs_n_base, inputs_n); | |
1162 __ Mov(inputs_n_last_vector, | |
1163 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count)); | |
1164 | |
1165 __ Ldr(vd, MemOperand(inputs_d_base)); | |
1166 | |
1167 __ Ldr(vn, MemOperand(inputs_n_last_vector)); | |
1168 | |
1169 __ Mov(index_n, 0); | |
1170 __ Bind(&loop_n); | |
1171 | |
1172 __ Ldr(vntmp_single, | |
1173 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); | |
1174 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes); | |
1175 | |
1176 for (unsigned i = 0; i < inputs_imm1_length; i++) { | |
1177 for (unsigned j = 0; j < inputs_imm2_length; j++) { | |
1178 __ Mov(vres, vd); | |
1179 (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]); | |
1180 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex)); | |
1181 } | |
1182 } | |
1183 | |
1184 __ Add(index_n, index_n, 1); | |
1185 __ Cmp(index_n, inputs_n_length); | |
1186 __ B(lo, &loop_n); | |
1187 | |
1188 END(); | |
1189 RUN(); | |
1190 TEARDOWN(); | |
1191 } | |
1192 | |
1193 // Test NEON instructions. The inputs_*[] and expected[] arrays should be | |
1194 // arrays of rawbit representation of input values. This ensures that | |
1195 // exact bit comparisons can be performed. | |
1196 template <typename Td, typename Tn> | |
1197 void TestOpImmOpImmNEON(const char* name, | |
1198 TestOpImmOpImmVdUpdateNEONHelper_t helper, | |
1199 const Td inputs_d[], const int inputs_imm1[], | |
1200 unsigned inputs_imm1_length, const Tn inputs_n[], | |
1201 unsigned inputs_n_length, const int inputs_imm2[], | |
1202 unsigned inputs_imm2_length, const Td expected[], | |
1203 unsigned expected_length, VectorFormat vd_form, | |
1204 VectorFormat vn_form) { | |
1205 DCHECK_GT(inputs_n_length, 0U); | |
1206 DCHECK_GT(inputs_imm1_length, 0U); | |
1207 DCHECK_GT(inputs_imm2_length, 0U); | |
1208 | |
1209 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
1210 | |
1211 const unsigned results_length = | |
1212 inputs_n_length * inputs_imm1_length * inputs_imm2_length; | |
1213 | |
1214 std::vector<Td> results(results_length * vd_lane_count, 0); | |
1215 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); | |
1216 | |
1217 TestOpImmOpImmNEON_Helper( | |
1218 helper, reinterpret_cast<uintptr_t>(inputs_d), inputs_imm1, | |
1219 inputs_imm1_length, reinterpret_cast<uintptr_t>(inputs_n), | |
1220 inputs_n_length, inputs_imm2, inputs_imm2_length, | |
1221 reinterpret_cast<uintptr_t>(results.data()), vd_form, vn_form); | |
1222 | |
1223 // Check the results. | |
1224 CHECK(expected_length == results_length); | |
1225 unsigned error_count = 0; | |
1226 unsigned counted_length = 0; | |
1227 const char* padding = " "; | |
1228 DCHECK(strlen(padding) >= (lane_len_in_hex + 1)); | |
1229 for (unsigned n = 0; n < inputs_n_length; n++) { | |
1230 for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) { | |
1231 for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) { | |
1232 bool error_in_vector = false; | |
1233 | |
1234 counted_length++; | |
1235 | |
1236 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
1237 unsigned output_index = | |
1238 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) + | |
1239 (imm1 * inputs_imm2_length * vd_lane_count) + | |
1240 (imm2 * vd_lane_count) + lane; | |
1241 | |
1242 if (results[output_index] != expected[output_index]) { | |
1243 error_in_vector = true; | |
1244 break; | |
1245 } | |
1246 } | |
1247 | |
1248 if (error_in_vector && (++error_count <= kErrorReportLimit)) { | |
1249 printf("%s\n", name); | |
1250 printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", | |
1251 lane_len_in_hex + 1, padding, lane_len_in_hex, padding, | |
1252 lane_len_in_hex + 1, padding, lane_len_in_hex, padding, | |
1253 lane_len_in_hex + 1, padding); | |
1254 | |
1255 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
1256 unsigned output_index = | |
1257 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) + | |
1258 (imm1 * inputs_imm2_length * vd_lane_count) + | |
1259 (imm2 * vd_lane_count) + lane; | |
1260 unsigned input_index_n = | |
1261 (inputs_n_length - vd_lane_count + n + 1 + lane) % | |
1262 inputs_n_length; | |
1263 unsigned input_index_imm1 = imm1; | |
1264 unsigned input_index_imm2 = imm2; | |
1265 | |
1266 printf( | |
1267 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 | |
1268 " " | |
1269 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", | |
1270 results[output_index] != expected[output_index] ? '*' : ' ', | |
1271 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]), | |
1272 lane_len_in_hex, | |
1273 static_cast<uint64_t>(inputs_imm1[input_index_imm1]), | |
1274 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]), | |
1275 lane_len_in_hex, | |
1276 static_cast<uint64_t>(inputs_imm2[input_index_imm2]), | |
1277 lane_len_in_hex, static_cast<uint64_t>(results[output_index]), | |
1278 lane_len_in_hex, static_cast<uint64_t>(expected[output_index])); | |
1279 } | |
1280 } | |
1281 } | |
1282 } | |
1283 } | |
1284 DCHECK_EQ(counted_length, expected_length); | |
1285 if (error_count > kErrorReportLimit) { | |
1286 printf("%u other errors follow.\n", error_count - kErrorReportLimit); | |
1287 } | |
1288 CHECK(error_count == 0); | |
1289 } | |
1290 | |
1291 } // anonymous namespace | |
1292 | |
1293 // ==== NEON Tests. ==== | |
1294 | |
1295 // clang-format off | |
1296 | |
1297 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \ | |
1298 Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ | |
1299 &MacroAssembler::mnemonic, input_n, \ | |
1300 (sizeof(input_n) / sizeof(input_n[0])), \ | |
1301 kExpected_NEON_##mnemonic##_##vdform, \ | |
1302 kExpectedCount_NEON_##mnemonic##_##vdform, kFormat##vdform, \ | |
1303 kFormat##vnform) | |
1304 | |
1305 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n) \ | |
1306 Test1OpAcrossNEON( \ | |
1307 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(vnform), \ | |
1308 &MacroAssembler::mnemonic, input_n, \ | |
1309 (sizeof(input_n) / sizeof(input_n[0])), \ | |
1310 kExpected_NEON_##mnemonic##_##vdform##_##vnform, \ | |
1311 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform, kFormat##vdform, \ | |
1312 kFormat##vnform) | |
1313 | |
1314 #define CALL_TEST_NEON_HELPER_2Op(mnemonic, vdform, vnform, vmform, input_d, \ | |
1315 input_n, input_m) \ | |
1316 Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ | |
1317 &MacroAssembler::mnemonic, input_d, input_n, \ | |
1318 (sizeof(input_n) / sizeof(input_n[0])), input_m, \ | |
1319 (sizeof(input_m) / sizeof(input_m[0])), \ | |
1320 kExpected_NEON_##mnemonic##_##vdform, \ | |
1321 kExpectedCount_NEON_##mnemonic##_##vdform, kFormat##vdform, \ | |
1322 kFormat##vnform, kFormat##vmform) | |
1323 | |
1324 #define CALL_TEST_NEON_HELPER_2OpImm(mnemonic, vdform, vnform, input_n, \ | |
1325 input_m) \ | |
1326 Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \ | |
1327 &MacroAssembler::mnemonic, input_n, \ | |
1328 (sizeof(input_n) / sizeof(input_n[0])), input_m, \ | |
1329 (sizeof(input_m) / sizeof(input_m[0])), \ | |
1330 kExpected_NEON_##mnemonic##_##vdform##_2OPIMM, \ | |
1331 kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM, \ | |
1332 kFormat##vdform, kFormat##vnform) | |
1333 | |
1334 #define CALL_TEST_NEON_HELPER_ByElement(mnemonic, vdform, vnform, vmform, \ | |
1335 input_d, input_n, input_m, indices) \ | |
1336 TestByElementNEON( \ | |
1337 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \ | |
1338 vnform) "_" STRINGIFY(vmform), \ | |
1339 &MacroAssembler::mnemonic, input_d, input_n, \ | |
1340 (sizeof(input_n) / sizeof(input_n[0])), input_m, \ | |
1341 (sizeof(input_m) / sizeof(input_m[0])), indices, \ | |
1342 (sizeof(indices) / sizeof(indices[0])), \ | |
1343 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ | |
1344 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ | |
1345 kFormat##vdform, kFormat##vnform, kFormat##vmform) | |
1346 | |
1347 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper, mnemonic, vdform, vnform, \ | |
1348 input_d, input_imm1, input_n, \ | |
1349 input_imm2) \ | |
1350 TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), helper, \ | |
1351 input_d, input_imm1, \ | |
1352 (sizeof(input_imm1) / sizeof(input_imm1[0])), input_n, \ | |
1353 (sizeof(input_n) / sizeof(input_n[0])), input_imm2, \ | |
1354 (sizeof(input_imm2) / sizeof(input_imm2[0])), \ | |
1355 kExpected_NEON_##mnemonic##_##vdform, \ | |
1356 kExpectedCount_NEON_##mnemonic##_##vdform, \ | |
1357 kFormat##vdform, kFormat##vnform) | |
1358 | |
1359 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \ | |
1360 CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input) | |
1361 | |
1362 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \ | |
1363 SIMTEST(mnemonic##_8B) { \ | |
1364 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input); \ | |
1365 } \ | |
1366 SIMTEST(mnemonic##_16B) { \ | |
1367 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \ | |
1368 } | |
1369 | |
1370 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) \ | |
1371 SIMTEST(mnemonic##_4H) { \ | |
1372 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \ | |
1373 } \ | |
1374 SIMTEST(mnemonic##_8H) { \ | |
1375 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \ | |
1376 } | |
1377 | |
1378 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \ | |
1379 SIMTEST(mnemonic##_2S) { \ | |
1380 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \ | |
1381 } \ | |
1382 SIMTEST(mnemonic##_4S) { \ | |
1383 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \ | |
1384 } | |
1385 | |
1386 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \ | |
1387 DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \ | |
1388 DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) | |
1389 | |
1390 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \ | |
1391 DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \ | |
1392 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) | |
1393 | |
1394 #define DEFINE_TEST_NEON_2SAME(mnemonic, input) \ | |
1395 DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \ | |
1396 SIMTEST(mnemonic##_2D) { \ | |
1397 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \ | |
1398 } | |
1399 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input) \ | |
1400 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \ | |
1401 SIMTEST(mnemonic##_2D) { \ | |
1402 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \ | |
1403 } | |
1404 | |
1405 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \ | |
1406 SIMTEST(mnemonic##_2S) { \ | |
1407 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input); \ | |
1408 } \ | |
1409 SIMTEST(mnemonic##_4S) { \ | |
1410 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input); \ | |
1411 } \ | |
1412 SIMTEST(mnemonic##_2D) { \ | |
1413 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \ | |
1414 } | |
1415 | |
1416 #define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input) \ | |
1417 SIMTEST(mnemonic##_S) { \ | |
1418 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input); \ | |
1419 } \ | |
1420 SIMTEST(mnemonic##_D) { \ | |
1421 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \ | |
1422 } | |
1423 | |
1424 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \ | |
1425 SIMTEST(mnemonic##_B) { \ | |
1426 CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \ | |
1427 } | |
1428 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \ | |
1429 SIMTEST(mnemonic##_H) { \ | |
1430 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \ | |
1431 } | |
1432 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ | |
1433 SIMTEST(mnemonic##_S) { \ | |
1434 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \ | |
1435 } | |
1436 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) \ | |
1437 SIMTEST(mnemonic##_D) { \ | |
1438 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \ | |
1439 } | |
1440 | |
1441 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \ | |
1442 DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \ | |
1443 DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \ | |
1444 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ | |
1445 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) | |
1446 | |
1447 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \ | |
1448 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ | |
1449 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) | |
1450 | |
1451 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \ | |
1452 CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n) | |
1453 | |
1454 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input) \ | |
1455 SIMTEST(mnemonic##_B_8B) { \ | |
1456 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input); \ | |
1457 } \ | |
1458 SIMTEST(mnemonic##_B_16B) { \ | |
1459 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \ | |
1460 } \ | |
1461 SIMTEST(mnemonic##_H_4H) { \ | |
1462 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \ | |
1463 } \ | |
1464 SIMTEST(mnemonic##_H_8H) { \ | |
1465 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \ | |
1466 } \ | |
1467 SIMTEST(mnemonic##_S_4S) { \ | |
1468 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \ | |
1469 } | |
1470 | |
1471 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input) \ | |
1472 SIMTEST(mnemonic##_H_8B) { \ | |
1473 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input); \ | |
1474 } \ | |
1475 SIMTEST(mnemonic##_H_16B) { \ | |
1476 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \ | |
1477 } \ | |
1478 SIMTEST(mnemonic##_S_4H) { \ | |
1479 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \ | |
1480 } \ | |
1481 SIMTEST(mnemonic##_S_8H) { \ | |
1482 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \ | |
1483 } \ | |
1484 SIMTEST(mnemonic##_D_4S) { \ | |
1485 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \ | |
1486 } | |
1487 | |
1488 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input) \ | |
1489 SIMTEST(mnemonic##_S_4S) { \ | |
1490 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \ | |
1491 } | |
1492 | |
1493 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \ | |
1494 CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) | |
1495 | |
1496 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input) \ | |
1497 SIMTEST(mnemonic##_4H) { \ | |
1498 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input); \ | |
1499 } \ | |
1500 SIMTEST(mnemonic##_8H) { \ | |
1501 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \ | |
1502 } \ | |
1503 SIMTEST(mnemonic##_2S) { \ | |
1504 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \ | |
1505 } \ | |
1506 SIMTEST(mnemonic##_4S) { \ | |
1507 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \ | |
1508 } \ | |
1509 SIMTEST(mnemonic##_1D) { \ | |
1510 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \ | |
1511 } \ | |
1512 SIMTEST(mnemonic##_2D) { \ | |
1513 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \ | |
1514 } | |
1515 | |
1516 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input) \ | |
1517 SIMTEST(mnemonic##_8B) { \ | |
1518 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input); \ | |
1519 } \ | |
1520 SIMTEST(mnemonic##_4H) { \ | |
1521 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input); \ | |
1522 } \ | |
1523 SIMTEST(mnemonic##_2S) { \ | |
1524 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input); \ | |
1525 } \ | |
1526 SIMTEST(mnemonic##2_16B) { \ | |
1527 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \ | |
1528 } \ | |
1529 SIMTEST(mnemonic##2_8H) { \ | |
1530 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \ | |
1531 } \ | |
1532 SIMTEST(mnemonic##2_4S) { \ | |
1533 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \ | |
1534 } | |
1535 | |
1536 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input) \ | |
1537 SIMTEST(mnemonic##_4S) { \ | |
1538 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input); \ | |
1539 } \ | |
1540 SIMTEST(mnemonic##_2D) { \ | |
1541 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input); \ | |
1542 } \ | |
1543 SIMTEST(mnemonic##2_4S) { \ | |
1544 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \ | |
1545 } \ | |
1546 SIMTEST(mnemonic##2_2D) { \ | |
1547 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input); \ | |
1548 } | |
1549 | |
1550 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input) \ | |
1551 SIMTEST(mnemonic##_4H) { \ | |
1552 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input); \ | |
1553 } \ | |
1554 SIMTEST(mnemonic##_2S) { \ | |
1555 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \ | |
1556 } \ | |
1557 SIMTEST(mnemonic##2_8H) { \ | |
1558 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input); \ | |
1559 } \ | |
1560 SIMTEST(mnemonic##2_4S) { \ | |
1561 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \ | |
1562 } | |
1563 | |
1564 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input) \ | |
1565 SIMTEST(mnemonic##_2S) { \ | |
1566 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \ | |
1567 } \ | |
1568 SIMTEST(mnemonic##2_4S) { \ | |
1569 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \ | |
1570 } | |
1571 | |
1572 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input) \ | |
1573 SIMTEST(mnemonic##_B) { \ | |
1574 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \ | |
1575 } \ | |
1576 SIMTEST(mnemonic##_H) { \ | |
1577 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \ | |
1578 } \ | |
1579 SIMTEST(mnemonic##_S) { \ | |
1580 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \ | |
1581 } | |
1582 | |
1583 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input) \ | |
1584 SIMTEST(mnemonic##_S) { \ | |
1585 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input); \ | |
1586 } \ | |
1587 SIMTEST(mnemonic##_D) { \ | |
1588 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \ | |
1589 } | |
1590 | |
1591 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \ | |
1592 { \ | |
1593 CALL_TEST_NEON_HELPER_2Op(mnemonic, variant, variant, variant, input_d, \ | |
1594 input_nm, input_nm); \ | |
1595 } | |
1596 | |
1597 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \ | |
1598 SIMTEST(mnemonic##_8B) { \ | |
1599 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8B, kInput8bitsAccDestination, \ | |
1600 kInput8bits##input); \ | |
1601 } \ | |
1602 SIMTEST(mnemonic##_16B) { \ | |
1603 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 16B, kInput8bitsAccDestination, \ | |
1604 kInput8bits##input); \ | |
1605 } | |
1606 | |
1607 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) \ | |
1608 SIMTEST(mnemonic##_4H) { \ | |
1609 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4H, kInput16bitsAccDestination, \ | |
1610 kInput16bits##input); \ | |
1611 } \ | |
1612 SIMTEST(mnemonic##_8H) { \ | |
1613 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8H, kInput16bitsAccDestination, \ | |
1614 kInput16bits##input); \ | |
1615 } \ | |
1616 SIMTEST(mnemonic##_2S) { \ | |
1617 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, kInput32bitsAccDestination, \ | |
1618 kInput32bits##input); \ | |
1619 } \ | |
1620 SIMTEST(mnemonic##_4S) { \ | |
1621 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, kInput32bitsAccDestination, \ | |
1622 kInput32bits##input); \ | |
1623 } | |
1624 | |
1625 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \ | |
1626 DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \ | |
1627 DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) | |
1628 | |
1629 #define DEFINE_TEST_NEON_3SAME(mnemonic, input) \ | |
1630 DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \ | |
1631 SIMTEST(mnemonic##_2D) { \ | |
1632 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, kInput64bitsAccDestination, \ | |
1633 kInput64bits##input); \ | |
1634 } | |
1635 | |
1636 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input) \ | |
1637 SIMTEST(mnemonic##_2S) { \ | |
1638 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, kInputFloatAccDestination, \ | |
1639 kInputFloat##input); \ | |
1640 } \ | |
1641 SIMTEST(mnemonic##_4S) { \ | |
1642 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, kInputFloatAccDestination, \ | |
1643 kInputFloat##input); \ | |
1644 } \ | |
1645 SIMTEST(mnemonic##_2D) { \ | |
1646 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, kInputDoubleAccDestination, \ | |
1647 kInputDouble##input); \ | |
1648 } | |
1649 | |
1650 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input) \ | |
1651 SIMTEST(mnemonic##_D) { \ | |
1652 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInput64bitsAccDestination, \ | |
1653 kInput64bits##input); \ | |
1654 } | |
1655 | |
1656 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input) \ | |
1657 SIMTEST(mnemonic##_H) { \ | |
1658 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, kInput16bitsAccDestination, \ | |
1659 kInput16bits##input); \ | |
1660 } \ | |
1661 SIMTEST(mnemonic##_S) { \ | |
1662 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInput32bitsAccDestination, \ | |
1663 kInput32bits##input); \ | |
1664 } | |
1665 | |
1666 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input) \ | |
1667 SIMTEST(mnemonic##_B) { \ | |
1668 CALL_TEST_NEON_HELPER_3SAME(mnemonic, B, kInput8bitsAccDestination, \ | |
1669 kInput8bits##input); \ | |
1670 } \ | |
1671 SIMTEST(mnemonic##_H) { \ | |
1672 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, kInput16bitsAccDestination, \ | |
1673 kInput16bits##input); \ | |
1674 } \ | |
1675 SIMTEST(mnemonic##_S) { \ | |
1676 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInput32bitsAccDestination, \ | |
1677 kInput32bits##input); \ | |
1678 } \ | |
1679 SIMTEST(mnemonic##_D) { \ | |
1680 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInput64bitsAccDestination, \ | |
1681 kInput64bits##input); \ | |
1682 } | |
1683 | |
1684 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input) \ | |
1685 SIMTEST(mnemonic##_S) { \ | |
1686 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInputFloatAccDestination, \ | |
1687 kInputFloat##input); \ | |
1688 } \ | |
1689 SIMTEST(mnemonic##_D) { \ | |
1690 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInputDoubleAccDestination, \ | |
1691 kInputDouble##input); \ | |
1692 } | |
1693 | |
1694 #define CALL_TEST_NEON_HELPER_3DIFF(mnemonic, vdform, vnform, vmform, input_d, \ | |
1695 input_n, input_m) \ | |
1696 { \ | |
1697 CALL_TEST_NEON_HELPER_2Op(mnemonic, vdform, vnform, vmform, input_d, \ | |
1698 input_n, input_m); \ | |
1699 } | |
1700 | |
1701 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \ | |
1702 SIMTEST(mnemonic##_8H) { \ | |
1703 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8B, 8B, \ | |
1704 kInput16bitsAccDestination, \ | |
1705 kInput8bits##input, kInput8bits##input); \ | |
1706 } \ | |
1707 SIMTEST(mnemonic##2_8H) { \ | |
1708 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 16B, 16B, \ | |
1709 kInput16bitsAccDestination, \ | |
1710 kInput8bits##input, kInput8bits##input); \ | |
1711 } | |
1712 | |
1713 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ | |
1714 SIMTEST(mnemonic##_4S) { \ | |
1715 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4H, 4H, \ | |
1716 kInput32bitsAccDestination, \ | |
1717 kInput16bits##input, kInput16bits##input); \ | |
1718 } \ | |
1719 SIMTEST(mnemonic##2_4S) { \ | |
1720 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 8H, 8H, \ | |
1721 kInput32bitsAccDestination, \ | |
1722 kInput16bits##input, kInput16bits##input); \ | |
1723 } | |
1724 | |
1725 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) \ | |
1726 SIMTEST(mnemonic##_2D) { \ | |
1727 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2S, 2S, \ | |
1728 kInput64bitsAccDestination, \ | |
1729 kInput32bits##input, kInput32bits##input); \ | |
1730 } \ | |
1731 SIMTEST(mnemonic##2_2D) { \ | |
1732 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 4S, 4S, \ | |
1733 kInput64bitsAccDestination, \ | |
1734 kInput32bits##input, kInput32bits##input); \ | |
1735 } | |
1736 | |
1737 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \ | |
1738 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ | |
1739 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) | |
1740 | |
1741 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \ | |
1742 DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \ | |
1743 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ | |
1744 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) | |
1745 | |
1746 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \ | |
1747 SIMTEST(mnemonic##_S) { \ | |
1748 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, S, H, H, kInput32bitsAccDestination, \ | |
1749 kInput16bits##input, kInput16bits##input); \ | |
1750 } | |
1751 | |
1752 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \ | |
1753 SIMTEST(mnemonic##_D) { \ | |
1754 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, D, S, S, kInput64bitsAccDestination, \ | |
1755 kInput32bits##input, kInput32bits##input); \ | |
1756 } | |
1757 | |
1758 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \ | |
1759 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \ | |
1760 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) | |
1761 | |
1762 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input) \ | |
1763 SIMTEST(mnemonic##_8H) { \ | |
1764 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8H, 8B, \ | |
1765 kInput16bitsAccDestination, \ | |
1766 kInput16bits##input, kInput8bits##input); \ | |
1767 } \ | |
1768 SIMTEST(mnemonic##_4S) { \ | |
1769 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4S, 4H, \ | |
1770 kInput32bitsAccDestination, \ | |
1771 kInput32bits##input, kInput16bits##input); \ | |
1772 } \ | |
1773 SIMTEST(mnemonic##_2D) { \ | |
1774 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2D, 2S, \ | |
1775 kInput64bitsAccDestination, \ | |
1776 kInput64bits##input, kInput32bits##input); \ | |
1777 } \ | |
1778 SIMTEST(mnemonic##2_8H) { \ | |
1779 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 8H, 16B, \ | |
1780 kInput16bitsAccDestination, \ | |
1781 kInput16bits##input, kInput8bits##input); \ | |
1782 } \ | |
1783 SIMTEST(mnemonic##2_4S) { \ | |
1784 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 4S, 8H, \ | |
1785 kInput32bitsAccDestination, \ | |
1786 kInput32bits##input, kInput16bits##input); \ | |
1787 } \ | |
1788 SIMTEST(mnemonic##2_2D) { \ | |
1789 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 2D, 4S, \ | |
1790 kInput64bitsAccDestination, \ | |
1791 kInput64bits##input, kInput32bits##input); \ | |
1792 } | |
1793 | |
1794 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input) \ | |
1795 SIMTEST(mnemonic##_8B) { \ | |
1796 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8B, 8H, 8H, \ | |
1797 kInput8bitsAccDestination, \ | |
1798 kInput16bits##input, kInput16bits##input); \ | |
1799 } \ | |
1800 SIMTEST(mnemonic##_4H) { \ | |
1801 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4H, 4S, 4S, \ | |
1802 kInput16bitsAccDestination, \ | |
1803 kInput32bits##input, kInput32bits##input); \ | |
1804 } \ | |
1805 SIMTEST(mnemonic##_2S) { \ | |
1806 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2S, 2D, 2D, \ | |
1807 kInput32bitsAccDestination, \ | |
1808 kInput64bits##input, kInput64bits##input); \ | |
1809 } \ | |
1810 SIMTEST(mnemonic##2_16B) { \ | |
1811 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 16B, 8H, 8H, \ | |
1812 kInput8bitsAccDestination, \ | |
1813 kInput16bits##input, kInput16bits##input); \ | |
1814 } \ | |
1815 SIMTEST(mnemonic##2_8H) { \ | |
1816 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 4S, 4S, \ | |
1817 kInput16bitsAccDestination, \ | |
1818 kInput32bits##input, kInput32bits##input); \ | |
1819 } \ | |
1820 SIMTEST(mnemonic##2_4S) { \ | |
1821 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 2D, 2D, \ | |
1822 kInput32bitsAccDestination, \ | |
1823 kInput64bits##input, kInput64bits##input); \ | |
1824 } | |
1825 | |
1826 #define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, vdform, vnform, input_n, \ | |
1827 input_imm) \ | |
1828 { \ | |
1829 CALL_TEST_NEON_HELPER_2OpImm(mnemonic, vdform, vnform, input_n, \ | |
1830 input_imm); \ | |
1831 } | |
1832 | |
1833 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm) \ | |
1834 SIMTEST(mnemonic##_8B_2OPIMM) { \ | |
1835 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, 8B, kInput8bits##input, \ | |
1836 kInput8bitsImm##input_imm); \ | |
1837 } \ | |
1838 SIMTEST(mnemonic##_16B_2OPIMM) { \ | |
1839 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 16B, 16B, kInput8bits##input, \ | |
1840 kInput8bitsImm##input_imm); \ | |
1841 } \ | |
1842 SIMTEST(mnemonic##_4H_2OPIMM) { \ | |
1843 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, 4H, kInput16bits##input, \ | |
1844 kInput16bitsImm##input_imm); \ | |
1845 } \ | |
1846 SIMTEST(mnemonic##_8H_2OPIMM) { \ | |
1847 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, 8H, kInput16bits##input, \ | |
1848 kInput16bitsImm##input_imm); \ | |
1849 } \ | |
1850 SIMTEST(mnemonic##_2S_2OPIMM) { \ | |
1851 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInput32bits##input, \ | |
1852 kInput32bitsImm##input_imm); \ | |
1853 } \ | |
1854 SIMTEST(mnemonic##_4S_2OPIMM) { \ | |
1855 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInput32bits##input, \ | |
1856 kInput32bitsImm##input_imm); \ | |
1857 } \ | |
1858 SIMTEST(mnemonic##_2D_2OPIMM) { \ | |
1859 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInput64bits##input, \ | |
1860 kInput64bitsImm##input_imm); \ | |
1861 } | |
1862 | |
1863 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \ | |
1864 SIMTEST(mnemonic##_8B_2OPIMM) { \ | |
1865 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, B, kInput8bits##input, \ | |
1866 kInput8bitsImm##input_imm); \ | |
1867 } \ | |
1868 SIMTEST(mnemonic##_16B_2OPIMM) { \ | |
1869 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 16B, B, kInput8bits##input, \ | |
1870 kInput8bitsImm##input_imm); \ | |
1871 } \ | |
1872 SIMTEST(mnemonic##_4H_2OPIMM) { \ | |
1873 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, H, kInput16bits##input, \ | |
1874 kInput16bitsImm##input_imm); \ | |
1875 } \ | |
1876 SIMTEST(mnemonic##_8H_2OPIMM) { \ | |
1877 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, H, kInput16bits##input, \ | |
1878 kInput16bitsImm##input_imm); \ | |
1879 } \ | |
1880 SIMTEST(mnemonic##_2S_2OPIMM) { \ | |
1881 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, S, kInput32bits##input, \ | |
1882 kInput32bitsImm##input_imm); \ | |
1883 } \ | |
1884 SIMTEST(mnemonic##_4S_2OPIMM) { \ | |
1885 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, S, kInput32bits##input, \ | |
1886 kInput32bitsImm##input_imm); \ | |
1887 } \ | |
1888 SIMTEST(mnemonic##_2D_2OPIMM) { \ | |
1889 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, D, kInput64bits##input, \ | |
1890 kInput64bitsImm##input_imm); \ | |
1891 } | |
1892 | |
1893 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \ | |
1894 SIMTEST(mnemonic##_8B_2OPIMM) { \ | |
1895 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, 8H, kInput16bits##input, \ | |
1896 kInput8bitsImm##input_imm); \ | |
1897 } \ | |
1898 SIMTEST(mnemonic##_4H_2OPIMM) { \ | |
1899 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, 4S, kInput32bits##input, \ | |
1900 kInput16bitsImm##input_imm); \ | |
1901 } \ | |
1902 SIMTEST(mnemonic##_2S_2OPIMM) { \ | |
1903 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2D, kInput64bits##input, \ | |
1904 kInput32bitsImm##input_imm); \ | |
1905 } \ | |
1906 SIMTEST(mnemonic##2_16B_2OPIMM) { \ | |
1907 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 16B, 8H, kInput16bits##input, \ | |
1908 kInput8bitsImm##input_imm); \ | |
1909 } \ | |
1910 SIMTEST(mnemonic##2_8H_2OPIMM) { \ | |
1911 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 8H, 4S, kInput32bits##input, \ | |
1912 kInput16bitsImm##input_imm); \ | |
1913 } \ | |
1914 SIMTEST(mnemonic##2_4S_2OPIMM) { \ | |
1915 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 4S, 2D, kInput64bits##input, \ | |
1916 kInput32bitsImm##input_imm); \ | |
1917 } | |
1918 | |
1919 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \ | |
1920 SIMTEST(mnemonic##_B_2OPIMM) { \ | |
1921 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, B, H, kInput16bits##input, \ | |
1922 kInput8bitsImm##input_imm); \ | |
1923 } \ | |
1924 SIMTEST(mnemonic##_H_2OPIMM) { \ | |
1925 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, H, S, kInput32bits##input, \ | |
1926 kInput16bitsImm##input_imm); \ | |
1927 } \ | |
1928 SIMTEST(mnemonic##_S_2OPIMM) { \ | |
1929 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, D, kInput64bits##input, \ | |
1930 kInput32bitsImm##input_imm); \ | |
1931 } | |
1932 | |
1933 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \ | |
1934 SIMTEST(mnemonic##_2S_2OPIMM) { \ | |
1935 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInputFloat##Basic, \ | |
1936 kInputDoubleImm##input_imm) \ | |
1937 } \ | |
1938 SIMTEST(mnemonic##_4S_2OPIMM) { \ | |
1939 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInputFloat##input, \ | |
1940 kInputDoubleImm##input_imm); \ | |
1941 } \ | |
1942 SIMTEST(mnemonic##_2D_2OPIMM) { \ | |
1943 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInputDouble##input, \ | |
1944 kInputDoubleImm##input_imm); \ | |
1945 } | |
1946 | |
1947 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \ | |
1948 SIMTEST(mnemonic##_2S_2OPIMM) { \ | |
1949 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInputFloat##Basic, \ | |
1950 kInput32bitsImm##input_imm) \ | |
1951 } \ | |
1952 SIMTEST(mnemonic##_4S_2OPIMM) { \ | |
1953 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInputFloat##input, \ | |
1954 kInput32bitsImm##input_imm) \ | |
1955 } \ | |
1956 SIMTEST(mnemonic##_2D_2OPIMM) { \ | |
1957 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInputDouble##input, \ | |
1958 kInput64bitsImm##input_imm) \ | |
1959 } | |
1960 | |
1961 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \ | |
1962 SIMTEST(mnemonic##_S_2OPIMM) { \ | |
1963 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInputFloat##Basic, \ | |
1964 kInput32bitsImm##input_imm) \ | |
1965 } \ | |
1966 SIMTEST(mnemonic##_D_2OPIMM) { \ | |
1967 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInputDouble##input, \ | |
1968 kInput64bitsImm##input_imm) \ | |
1969 } | |
1970 | |
1971 #define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm) \ | |
1972 SIMTEST(mnemonic##_2S_2OPIMM) { \ | |
1973 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInput32bits##input, \ | |
1974 kInput32bitsImm##input_imm); \ | |
1975 } \ | |
1976 SIMTEST(mnemonic##_4S_2OPIMM) { \ | |
1977 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInput32bits##input, \ | |
1978 kInput32bitsImm##input_imm); \ | |
1979 } \ | |
1980 SIMTEST(mnemonic##_2D_2OPIMM) { \ | |
1981 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInput64bits##input, \ | |
1982 kInput64bitsImm##input_imm); \ | |
1983 } | |
1984 | |
1985 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \ | |
1986 SIMTEST(mnemonic##_D_2OPIMM) { \ | |
1987 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInput64bits##input, \ | |
1988 kInput64bitsImm##input_imm); \ | |
1989 } | |
1990 | |
1991 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) \ | |
1992 SIMTEST(mnemonic##_S_2OPIMM) { \ | |
1993 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInput32bits##input, \ | |
1994 kInput32bitsImm##input_imm); \ | |
1995 } \ | |
1996 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) | |
1997 | |
1998 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \ | |
1999 SIMTEST(mnemonic##_D_2OPIMM) { \ | |
2000 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInputDouble##input, \ | |
2001 kInputDoubleImm##input_imm); \ | |
2002 } | |
2003 | |
2004 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm) \ | |
2005 SIMTEST(mnemonic##_S_2OPIMM) { \ | |
2006 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInputFloat##input, \ | |
2007 kInputDoubleImm##input_imm); \ | |
2008 } \ | |
2009 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) | |
2010 | |
2011 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \ | |
2012 SIMTEST(mnemonic##_B_2OPIMM) { \ | |
2013 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, B, B, kInput8bits##input, \ | |
2014 kInput8bitsImm##input_imm); \ | |
2015 } \ | |
2016 SIMTEST(mnemonic##_H_2OPIMM) { \ | |
2017 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, H, H, kInput16bits##input, \ | |
2018 kInput16bitsImm##input_imm); \ | |
2019 } \ | |
2020 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) | |
2021 | |
2022 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \ | |
2023 SIMTEST(mnemonic##_8H_2OPIMM) { \ | |
2024 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, 8B, kInput8bits##input, \ | |
2025 kInput8bitsImm##input_imm); \ | |
2026 } \ | |
2027 SIMTEST(mnemonic##_4S_2OPIMM) { \ | |
2028 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4H, kInput16bits##input, \ | |
2029 kInput16bitsImm##input_imm); \ | |
2030 } \ | |
2031 SIMTEST(mnemonic##_2D_2OPIMM) { \ | |
2032 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2S, kInput32bits##input, \ | |
2033 kInput32bitsImm##input_imm); \ | |
2034 } \ | |
2035 SIMTEST(mnemonic##2_8H_2OPIMM) { \ | |
2036 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 8H, 16B, kInput8bits##input, \ | |
2037 kInput8bitsImm##input_imm); \ | |
2038 } \ | |
2039 SIMTEST(mnemonic##2_4S_2OPIMM) { \ | |
2040 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 4S, 8H, kInput16bits##input, \ | |
2041 kInput16bitsImm##input_imm); \ | |
2042 } \ | |
2043 SIMTEST(mnemonic##2_2D_2OPIMM) { \ | |
2044 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 2D, 4S, kInput32bits##input, \ | |
2045 kInput32bitsImm##input_imm); \ | |
2046 } | |
2047 | |
2048 #define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, vdform, vnform, vmform, \ | |
2049 input_d, input_n, input_m, indices) \ | |
2050 { \ | |
2051 CALL_TEST_NEON_HELPER_ByElement(mnemonic, vdform, vnform, vmform, input_d, \ | |
2052 input_n, input_m, indices); \ | |
2053 } | |
2054 | |
2055 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \ | |
2056 SIMTEST(mnemonic##_4H_4H_H) { \ | |
2057 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
2058 mnemonic, 4H, 4H, H, kInput16bits##input_d, kInput16bits##input_n, \ | |
2059 kInput16bits##input_m, kInputHIndices); \ | |
2060 } \ | |
2061 SIMTEST(mnemonic##_8H_8H_H) { \ | |
2062 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
2063 mnemonic, 8H, 8H, H, kInput16bits##input_d, kInput16bits##input_n, \ | |
2064 kInput16bits##input_m, kInputHIndices); \ | |
2065 } \ | |
2066 SIMTEST(mnemonic##_2S_2S_S) { \ | |
2067 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
2068 mnemonic, 2S, 2S, S, kInput32bits##input_d, kInput32bits##input_n, \ | |
2069 kInput32bits##input_m, kInputSIndices); \ | |
2070 } \ | |
2071 SIMTEST(mnemonic##_4S_4S_S) { \ | |
2072 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
2073 mnemonic, 4S, 4S, S, kInput32bits##input_d, kInput32bits##input_n, \ | |
2074 kInput32bits##input_m, kInputSIndices); \ | |
2075 } | |
2076 | |
2077 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \ | |
2078 SIMTEST(mnemonic##_H_H_H) { \ | |
2079 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, H, H, H, kInput16bits##input_d, \ | |
2080 kInput16bits##input_n, \ | |
2081 kInput16bits##input_m, kInputHIndices); \ | |
2082 } \ | |
2083 SIMTEST(mnemonic##_S_S_S) { \ | |
2084 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, S, S, kInput32bits##input_d, \ | |
2085 kInput32bits##input_n, \ | |
2086 kInput32bits##input_m, kInputSIndices); \ | |
2087 } | |
2088 | |
2089 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \ | |
2090 SIMTEST(mnemonic##_2S_2S_S) { \ | |
2091 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, 2S, 2S, S, kInputFloat##input_d, \ | |
2092 kInputFloat##input_n, \ | |
2093 kInputFloat##input_m, kInputSIndices); \ | |
2094 } \ | |
2095 SIMTEST(mnemonic##_4S_4S_S) { \ | |
2096 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, 4S, 4S, S, kInputFloat##input_d, \ | |
2097 kInputFloat##input_n, \ | |
2098 kInputFloat##input_m, kInputSIndices); \ | |
2099 } \ | |
2100 SIMTEST(mnemonic##_2D_2D_D) { \ | |
2101 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
2102 mnemonic, 2D, 2D, D, kInputDouble##input_d, kInputDouble##input_n, \ | |
2103 kInputDouble##input_m, kInputDIndices); \ | |
2104 } | |
2105 | |
2106 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \ | |
2107 SIMTEST(mnemonic##_S_S_S) { \ | |
2108 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, S, S, kInputFloat##inp_d, \ | |
2109 kInputFloat##inp_n, kInputFloat##inp_m, \ | |
2110 kInputSIndices); \ | |
2111 } \ | |
2112 SIMTEST(mnemonic##_D_D_D) { \ | |
2113 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, D, D, D, kInputDouble##inp_d, \ | |
2114 kInputDouble##inp_n, kInputDouble##inp_m, \ | |
2115 kInputDIndices); \ | |
2116 } | |
2117 | |
2118 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \ | |
2119 SIMTEST(mnemonic##_4S_4H_H) { \ | |
2120 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
2121 mnemonic, 4S, 4H, H, kInput32bits##input_d, kInput16bits##input_n, \ | |
2122 kInput16bits##input_m, kInputHIndices); \ | |
2123 } \ | |
2124 SIMTEST(mnemonic##2_4S_8H_H) { \ | |
2125 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
2126 mnemonic##2, 4S, 8H, H, kInput32bits##input_d, kInput16bits##input_n, \ | |
2127 kInput16bits##input_m, kInputHIndices); \ | |
2128 } \ | |
2129 SIMTEST(mnemonic##_2D_2S_S) { \ | |
2130 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
2131 mnemonic, 2D, 2S, S, kInput64bits##input_d, kInput32bits##input_n, \ | |
2132 kInput32bits##input_m, kInputSIndices); \ | |
2133 } \ | |
2134 SIMTEST(mnemonic##2_2D_4S_S) { \ | |
2135 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
2136 mnemonic##2, 2D, 4S, S, kInput64bits##input_d, kInput32bits##input_n, \ | |
2137 kInput32bits##input_m, kInputSIndices); \ | |
2138 } | |
2139 | |
2140 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic, input_d, input_n, \ | |
2141 input_m) \ | |
2142 SIMTEST(mnemonic##_S_H_H) { \ | |
2143 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, H, H, kInput32bits##input_d, \ | |
2144 kInput16bits##input_n, \ | |
2145 kInput16bits##input_m, kInputHIndices); \ | |
2146 } \ | |
2147 SIMTEST(mnemonic##_D_S_S) { \ | |
2148 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, D, S, S, kInput64bits##input_d, \ | |
2149 kInput32bits##input_n, \ | |
2150 kInput32bits##input_m, kInputSIndices); \ | |
2151 } | |
2152 | |
2153 #define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, variant, input_d, input_imm1, \ | |
2154 input_n, input_imm2) \ | |
2155 { \ | |
2156 CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, mnemonic, \ | |
2157 variant, variant, input_d, input_imm1, \ | |
2158 input_n, input_imm2); \ | |
2159 } | |
2160 | |
2161 #define DEFINE_TEST_NEON_2OP2IMM(mnemonic, input_d, input_imm1, input_n, \ | |
2162 input_imm2) \ | |
2163 SIMTEST(mnemonic##_B) { \ | |
2164 CALL_TEST_NEON_HELPER_2OP2IMM( \ | |
2165 mnemonic, 16B, kInput8bits##input_d, kInput8bitsImm##input_imm1, \ | |
2166 kInput8bits##input_n, kInput8bitsImm##input_imm2); \ | |
2167 } \ | |
2168 SIMTEST(mnemonic##_H) { \ | |
2169 CALL_TEST_NEON_HELPER_2OP2IMM( \ | |
2170 mnemonic, 8H, kInput16bits##input_d, kInput16bitsImm##input_imm1, \ | |
2171 kInput16bits##input_n, kInput16bitsImm##input_imm2); \ | |
2172 } \ | |
2173 SIMTEST(mnemonic##_S) { \ | |
2174 CALL_TEST_NEON_HELPER_2OP2IMM( \ | |
2175 mnemonic, 4S, kInput32bits##input_d, kInput32bitsImm##input_imm1, \ | |
2176 kInput32bits##input_n, kInput32bitsImm##input_imm2); \ | |
2177 } \ | |
2178 SIMTEST(mnemonic##_D) { \ | |
2179 CALL_TEST_NEON_HELPER_2OP2IMM( \ | |
2180 mnemonic, 2D, kInput64bits##input_d, kInput64bitsImm##input_imm1, \ | |
2181 kInput64bits##input_n, kInput64bitsImm##input_imm2); \ | |
2182 } | |
2183 | |
2184 // clang-format on | |
2185 | |
2186 // Advanced SIMD copy. | |
2187 DEFINE_TEST_NEON_2OP2IMM(ins, Basic, LaneCountFromZero, Basic, | |
2188 LaneCountFromZero) | |
2189 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero) | |
2190 | |
2191 // Advanced SIMD scalar copy. | |
2192 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero) | |
2193 | |
2194 // Advanced SIMD three same. | |
2195 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic) | |
2196 DEFINE_TEST_NEON_3SAME(sqadd, Basic) | |
2197 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic) | |
2198 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic) | |
2199 DEFINE_TEST_NEON_3SAME(sqsub, Basic) | |
2200 DEFINE_TEST_NEON_3SAME(cmgt, Basic) | |
2201 DEFINE_TEST_NEON_3SAME(cmge, Basic) | |
2202 DEFINE_TEST_NEON_3SAME(sshl, Basic) | |
2203 DEFINE_TEST_NEON_3SAME(sqshl, Basic) | |
2204 DEFINE_TEST_NEON_3SAME(srshl, Basic) | |
2205 DEFINE_TEST_NEON_3SAME(sqrshl, Basic) | |
2206 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic) | |
2207 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic) | |
2208 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic) | |
2209 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic) | |
2210 DEFINE_TEST_NEON_3SAME(add, Basic) | |
2211 DEFINE_TEST_NEON_3SAME(cmtst, Basic) | |
2212 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic) | |
2213 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic) | |
2214 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic) | |
2215 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic) | |
2216 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic) | |
2217 DEFINE_TEST_NEON_3SAME(addp, Basic) | |
2218 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic) | |
2219 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic) | |
2220 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic) | |
2221 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic) | |
2222 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic) | |
2223 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic) | |
2224 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic) | |
2225 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic) | |
2226 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic) | |
2227 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic) | |
2228 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic) | |
2229 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic) | |
2230 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic) | |
2231 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic) | |
2232 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic) | |
2233 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic) | |
2234 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic) | |
2235 DEFINE_TEST_NEON_3SAME(uqadd, Basic) | |
2236 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic) | |
2237 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic) | |
2238 DEFINE_TEST_NEON_3SAME(uqsub, Basic) | |
2239 DEFINE_TEST_NEON_3SAME(cmhi, Basic) | |
2240 DEFINE_TEST_NEON_3SAME(cmhs, Basic) | |
2241 DEFINE_TEST_NEON_3SAME(ushl, Basic) | |
2242 DEFINE_TEST_NEON_3SAME(uqshl, Basic) | |
2243 DEFINE_TEST_NEON_3SAME(urshl, Basic) | |
2244 DEFINE_TEST_NEON_3SAME(uqrshl, Basic) | |
2245 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic) | |
2246 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic) | |
2247 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic) | |
2248 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic) | |
2249 DEFINE_TEST_NEON_3SAME(sub, Basic) | |
2250 DEFINE_TEST_NEON_3SAME(cmeq, Basic) | |
2251 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic) | |
2252 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic) | |
2253 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic) | |
2254 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic) | |
2255 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic) | |
2256 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic) | |
2257 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic) | |
2258 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic) | |
2259 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic) | |
2260 DEFINE_TEST_NEON_3SAME_FP(facge, Basic) | |
2261 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic) | |
2262 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic) | |
2263 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic) | |
2264 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic) | |
2265 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic) | |
2266 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic) | |
2267 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic) | |
2268 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic) | |
2269 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic) | |
2270 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic) | |
2271 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic) | |
2272 | |
2273 // Advanced SIMD scalar three same. | |
2274 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic) | |
2275 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic) | |
2276 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic) | |
2277 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic) | |
2278 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic) | |
2279 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic) | |
2280 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic) | |
2281 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic) | |
2282 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic) | |
2283 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic) | |
2284 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic) | |
2285 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic) | |
2286 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic) | |
2287 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic) | |
2288 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic) | |
2289 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic) | |
2290 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic) | |
2291 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic) | |
2292 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic) | |
2293 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic) | |
2294 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic) | |
2295 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic) | |
2296 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic) | |
2297 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic) | |
2298 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic) | |
2299 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic) | |
2300 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic) | |
2301 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic) | |
2302 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic) | |
2303 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic) | |
2304 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic) | |
2305 | |
2306 // Advanced SIMD three different. | |
2307 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic) | |
2308 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic) | |
2309 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic) | |
2310 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic) | |
2311 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic) | |
2312 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic) | |
2313 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic) | |
2314 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic) | |
2315 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic) | |
2316 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic) | |
2317 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic) | |
2318 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic) | |
2319 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic) | |
2320 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic) | |
2321 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic) | |
2322 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic) | |
2323 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic) | |
2324 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic) | |
2325 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic) | |
2326 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic) | |
2327 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic) | |
2328 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic) | |
2329 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic) | |
2330 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic) | |
2331 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic) | |
2332 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic) | |
2333 | |
2334 // Advanced SIMD scalar three different. | |
2335 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic) | |
2336 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic) | |
2337 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic) | |
2338 | |
2339 // Advanced SIMD scalar pairwise. | |
2340 SIMTEST(addp_SCALAR) { | |
2341 CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic); | |
2342 } | |
2343 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic) | |
2344 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic) | |
2345 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic) | |
2346 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic) | |
2347 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic) | |
2348 | |
2349 // Advanced SIMD shift by immediate. | |
2350 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth) | |
2351 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth) | |
2352 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth) | |
2353 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth) | |
2354 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero) | |
2355 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero) | |
2356 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth) | |
2357 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth) | |
2358 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth) | |
2359 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth) | |
2360 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero) | |
2361 DEFINE_TEST_NEON_2OPIMM_SD(scvtf, FixedPointConversions, | |
2362 TypeWidthFromZeroToWidth) | |
2363 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth) | |
2364 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth) | |
2365 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth) | |
2366 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth) | |
2367 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth) | |
2368 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth) | |
2369 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero) | |
2370 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero) | |
2371 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero) | |
2372 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth) | |
2373 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth) | |
2374 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth) | |
2375 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth) | |
2376 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero) | |
2377 DEFINE_TEST_NEON_2OPIMM_SD(ucvtf, FixedPointConversions, | |
2378 TypeWidthFromZeroToWidth) | |
2379 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth) | |
2380 | |
2381 // Advanced SIMD scalar shift by immediate.. | |
2382 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth) | |
2383 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth) | |
2384 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth) | |
2385 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth) | |
2386 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero) | |
2387 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero) | |
2388 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth) | |
2389 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth) | |
2390 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf, FixedPointConversions, | |
2391 TypeWidthFromZeroToWidth) | |
2392 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth) | |
2393 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth) | |
2394 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth) | |
2395 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth) | |
2396 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth) | |
2397 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth) | |
2398 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero) | |
2399 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero) | |
2400 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero) | |
2401 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth) | |
2402 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth) | |
2403 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth) | |
2404 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth) | |
2405 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf, FixedPointConversions, | |
2406 TypeWidthFromZeroToWidth) | |
2407 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth) | |
2408 | |
2409 // Advanced SIMD two-register miscellaneous. | |
2410 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic) | |
2411 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic) | |
2412 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic) | |
2413 DEFINE_TEST_NEON_2SAME(suqadd, Basic) | |
2414 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic) | |
2415 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic) | |
2416 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic) | |
2417 DEFINE_TEST_NEON_2SAME(sqabs, Basic) | |
2418 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero) | |
2419 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero) | |
2420 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero) | |
2421 DEFINE_TEST_NEON_2SAME(abs, Basic) | |
2422 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic) | |
2423 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic) | |
2424 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions) | |
2425 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions) | |
2426 DEFINE_TEST_NEON_2SAME_FP(frintn, Conversions) | |
2427 DEFINE_TEST_NEON_2SAME_FP(frintm, Conversions) | |
2428 DEFINE_TEST_NEON_2SAME_FP(fcvtns, Conversions) | |
2429 DEFINE_TEST_NEON_2SAME_FP(fcvtms, Conversions) | |
2430 DEFINE_TEST_NEON_2SAME_FP(fcvtas, Conversions) | |
2431 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0. | |
2432 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero) | |
2433 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero) | |
2434 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero) | |
2435 DEFINE_TEST_NEON_2SAME_FP(fabs, Basic) | |
2436 DEFINE_TEST_NEON_2SAME_FP(frintp, Conversions) | |
2437 DEFINE_TEST_NEON_2SAME_FP(frintz, Conversions) | |
2438 DEFINE_TEST_NEON_2SAME_FP(fcvtps, Conversions) | |
2439 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0. | |
2440 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic) | |
2441 DEFINE_TEST_NEON_2SAME_FP(frecpe, Basic) | |
2442 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic) | |
2443 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic) | |
2444 DEFINE_TEST_NEON_2SAME(usqadd, Basic) | |
2445 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic) | |
2446 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic) | |
2447 DEFINE_TEST_NEON_2SAME(sqneg, Basic) | |
2448 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero) | |
2449 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero) | |
2450 DEFINE_TEST_NEON_2SAME(neg, Basic) | |
2451 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic) | |
2452 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL) | |
2453 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic) | |
2454 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions) | |
2455 DEFINE_TEST_NEON_2SAME_FP(frinta, Conversions) | |
2456 DEFINE_TEST_NEON_2SAME_FP(frintx, Conversions) | |
2457 DEFINE_TEST_NEON_2SAME_FP(fcvtnu, Conversions) | |
2458 DEFINE_TEST_NEON_2SAME_FP(fcvtmu, Conversions) | |
2459 DEFINE_TEST_NEON_2SAME_FP(fcvtau, Conversions) | |
2460 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0. | |
2461 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic) | |
2462 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic) | |
2463 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero) | |
2464 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero) | |
2465 DEFINE_TEST_NEON_2SAME_FP(fneg, Basic) | |
2466 DEFINE_TEST_NEON_2SAME_FP(frinti, Conversions) | |
2467 DEFINE_TEST_NEON_2SAME_FP(fcvtpu, Conversions) | |
2468 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0. | |
2469 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic) | |
2470 DEFINE_TEST_NEON_2SAME_FP(frsqrte, Basic) | |
2471 DEFINE_TEST_NEON_2SAME_FP(fsqrt, Basic) | |
2472 | |
2473 // Advanced SIMD scalar two-register miscellaneous. | |
2474 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic) | |
2475 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic) | |
2476 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero) | |
2477 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero) | |
2478 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero) | |
2479 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic) | |
2480 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic) | |
2481 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtns, Conversions) | |
2482 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtms, Conversions) | |
2483 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtas, Conversions) | |
2484 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0. | |
2485 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmgt, Basic, Zero) | |
2486 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmeq, Basic, Zero) | |
2487 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmlt, Basic, Zero) | |
2488 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtps, Conversions) | |
2489 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0. | |
2490 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpe, Basic) | |
2491 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpx, Basic) | |
2492 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic) | |
2493 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic) | |
2494 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero) | |
2495 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero) | |
2496 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic) | |
2497 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic) | |
2498 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic) | |
2499 SIMTEST(fcvtxn_SCALAR) { | |
2500 CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions); | |
2501 } | |
2502 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtnu, Conversions) | |
2503 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtmu, Conversions) | |
2504 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtau, Conversions) | |
2505 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0. | |
2506 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmge, Basic, Zero) | |
2507 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmle, Basic, Zero) | |
2508 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtpu, Conversions) | |
2509 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0. | |
2510 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frsqrte, Basic) | |
2511 | |
2512 // Advanced SIMD across lanes. | |
2513 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic) | |
2514 DEFINE_TEST_NEON_ACROSS(smaxv, Basic) | |
2515 DEFINE_TEST_NEON_ACROSS(sminv, Basic) | |
2516 DEFINE_TEST_NEON_ACROSS(addv, Basic) | |
2517 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic) | |
2518 DEFINE_TEST_NEON_ACROSS(umaxv, Basic) | |
2519 DEFINE_TEST_NEON_ACROSS(uminv, Basic) | |
2520 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic) | |
2521 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic) | |
2522 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic) | |
2523 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic) | |
2524 | |
2525 // Advanced SIMD permute. | |
2526 DEFINE_TEST_NEON_3SAME(uzp1, Basic) | |
2527 DEFINE_TEST_NEON_3SAME(trn1, Basic) | |
2528 DEFINE_TEST_NEON_3SAME(zip1, Basic) | |
2529 DEFINE_TEST_NEON_3SAME(uzp2, Basic) | |
2530 DEFINE_TEST_NEON_3SAME(trn2, Basic) | |
2531 DEFINE_TEST_NEON_3SAME(zip2, Basic) | |
2532 | |
2533 // Advanced SIMD vector x indexed element. | |
2534 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic) | |
2535 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic) | |
2536 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic) | |
2537 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic) | |
2538 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic) | |
2539 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic) | |
2540 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic) | |
2541 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic) | |
2542 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic) | |
2543 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic) | |
2544 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic) | |
2545 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic) | |
2546 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic) | |
2547 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic) | |
2548 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic) | |
2549 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic) | |
2550 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic) | |
2551 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic) | |
2552 | |
2553 // Advanced SIMD scalar x indexed element. | |
2554 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic) | |
2555 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic) | |
2556 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic) | |
2557 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic) | |
2558 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic) | |
2559 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic) | |
2560 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic) | |
2561 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic) | |
2562 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic) | |
OLD | NEW |