OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 the V8 project authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include <stdio.h> | |
6 #include <stdlib.h> | |
7 #include <string.h> | |
8 #include <cmath> | |
9 #include <limits> | |
10 | |
11 #include "src/arm64/decoder-arm64-inl.h" | |
12 #include "src/arm64/disasm-arm64.h" | |
13 #include "src/arm64/simulator-arm64.h" | |
14 #include "src/arm64/utils-arm64.h" | |
15 #include "src/base/platform/platform.h" | |
16 #include "src/base/utils/random-number-generator.h" | |
17 #include "src/macro-assembler.h" | |
18 #include "test/cctest/cctest.h" | |
19 #include "test/cctest/test-simulator-inputs-arm64.h" | |
20 #include "test/cctest/test-simulator-traces-arm64.h" | |
21 #include "test/cctest/test-utils-arm64.h" | |
22 | |
23 using namespace v8::internal; | |
24 | |
25 // Test infrastructure. | |
26 // | |
27 // Tests are functions which accept no parameters and have no return values. | |
28 // The testing code should not perform an explicit return once completed. For | |
29 // example to test the mov immediate instruction a very simple test would be: | |
30 // | |
31 // SIMTEST(mov_x0_one) { | |
32 // SETUP(); | |
33 // | |
34 // START(); | |
35 // __ mov(x0, Operand(1)); | |
36 // END(); | |
37 // | |
38 // RUN(); | |
39 // | |
40 // CHECK_EQUAL_64(1, x0); | |
41 // | |
42 // TEARDOWN(); | |
43 // } | |
44 // | |
45 // Within a START ... END block all registers but sp can be modified. sp has to | |
46 // be explicitly saved/restored. The END() macro replaces the function return | |
47 // so it may appear multiple times in a test if the test has multiple exit | |
48 // points. | |
49 // | |
50 // Once the test has been run all integer and floating point registers as well | |
51 // as flags are accessible through a RegisterDump instance, see | |
52 // utils-arm64.cc for more info on RegisterDump. | |
bbudge
2017/01/31 01:41:32
It seems to be in test-utils-arm64.h
martyn.capewell
2017/02/03 11:01:31
Done.
| |
53 // | |
54 // We provide some helper assert to handle common cases: | |
55 // | |
56 // CHECK_EQUAL_32(int32_t, int_32t) | |
bbudge
2017/01/31 01:41:32
nit int32_t
martyn.capewell
2017/02/03 11:01:31
Done.
| |
57 // CHECK_EQUAL_FP32(float, float) | |
58 // CHECK_EQUAL_32(int32_t, W register) | |
59 // CHECK_EQUAL_FP32(float, S register) | |
60 // CHECK_EQUAL_64(int64_t, int_64t) | |
bbudge
2017/01/31 01:41:32
int64_t
martyn.capewell
2017/02/03 11:01:31
Done.
| |
61 // CHECK_EQUAL_FP64(double, double) | |
62 // CHECK_EQUAL_64(int64_t, X register) | |
63 // CHECK_EQUAL_64(X register, X register) | |
64 // CHECK_EQUAL_FP64(double, D register) | |
65 // | |
66 // e.g. CHECK_EQUAL_64(0.5, d30); | |
67 // | |
68 // If more advance computation is required before the assert then access the | |
69 // RegisterDump named core directly: | |
70 // | |
71 // CHECK_EQUAL_64(0x1234, core.xreg(0) & 0xffff); | |
72 | |
73 #if 0 // TODO(all): enable. | |
74 static v8::Persistent<v8::Context> env; | |
75 | |
76 static void InitializeVM() { | |
77 if (env.IsEmpty()) { | |
78 env = v8::Context::New(); | |
79 } | |
80 } | |
81 #endif | |
82 | |
83 #define __ masm. | |
84 #define SIMTEST(name) TEST(SIM_##name) | |
85 | |
86 #define BUF_SIZE 8192 | |
87 #define SETUP() SETUP_SIZE(BUF_SIZE) | |
88 | |
89 #define INIT_V8() CcTest::InitializeVM(); | |
90 | |
91 #ifdef USE_SIMULATOR | |
92 | |
93 // Run tests with the simulator. | |
94 #define SETUP_SIZE(buf_size) \ | |
95 Isolate* isolate = CcTest::i_isolate(); \ | |
96 HandleScope scope(isolate); \ | |
97 CHECK(isolate != NULL); \ | |
98 byte* buf = new byte[buf_size]; \ | |
99 MacroAssembler masm(isolate, buf, buf_size, \ | |
100 v8::internal::CodeObjectRequired::kYes); \ | |
101 Decoder<DispatchingDecoderVisitor>* decoder = \ | |
102 new Decoder<DispatchingDecoderVisitor>(); \ | |
103 Simulator simulator(decoder); \ | |
104 RegisterDump core; | |
105 | |
106 // Reset the assembler and simulator, so that instructions can be generated, | |
107 // but don't actually emit any code. This can be used by tests that need to | |
108 // emit instructions at the start of the buffer. Note that START_AFTER_RESET | |
109 // must be called before any callee-saved register is modified, and before an | |
110 // END is encountered. | |
111 // | |
112 // Most tests should call START, rather than call RESET directly. | |
113 #define RESET() \ | |
114 __ Reset(); \ | |
115 simulator.ResetState(); | |
116 | |
117 #define START_AFTER_RESET() \ | |
118 __ SetStackPointer(csp); \ | |
119 __ PushCalleeSavedRegisters(); \ | |
120 __ Debug("Start test.", __LINE__, TRACE_ENABLE | LOG_ALL); | |
121 | |
122 #define START() \ | |
123 RESET(); \ | |
124 START_AFTER_RESET(); | |
125 | |
126 #define RUN() simulator.RunFrom(reinterpret_cast<Instruction*>(buf)) | |
127 | |
128 #define END() \ | |
129 __ Debug("End test.", __LINE__, TRACE_DISABLE | LOG_ALL); \ | |
130 core.Dump(&masm); \ | |
131 __ PopCalleeSavedRegisters(); \ | |
132 __ Ret(); \ | |
133 __ GetCode(NULL); | |
134 | |
135 #define TEARDOWN() delete[] buf; | |
136 | |
137 #else // ifdef USE_SIMULATOR. | |
138 // Run the test on real hardware or models. | |
139 #define SETUP_SIZE(buf_size) \ | |
140 Isolate* isolate = CcTest::i_isolate(); \ | |
141 HandleScope scope(isolate); \ | |
142 CHECK(isolate != NULL); \ | |
143 size_t actual_size; \ | |
144 byte* buf = static_cast<byte*>( \ | |
145 v8::base::OS::Allocate(buf_size, &actual_size, true)); \ | |
146 MacroAssembler masm(isolate, buf, actual_size, \ | |
147 v8::internal::CodeObjectRequired::kYes); \ | |
148 RegisterDump core; | |
149 | |
150 #define RESET() \ | |
151 __ Reset(); \ | |
152 /* Reset the machine state (like simulator.ResetState()). */ \ | |
153 __ Msr(NZCV, xzr); \ | |
154 __ Msr(FPCR, xzr); | |
155 | |
156 #define START_AFTER_RESET() \ | |
157 __ SetStackPointer(csp); \ | |
158 __ PushCalleeSavedRegisters(); | |
159 | |
160 #define START() \ | |
161 RESET(); \ | |
162 START_AFTER_RESET(); | |
163 | |
164 #define RUN() \ | |
165 Assembler::FlushICache(isolate, buf, masm.SizeOfGeneratedCode()); \ | |
166 { \ | |
167 void (*test_function)(void); \ | |
168 memcpy(&test_function, &buf, sizeof(buf)); \ | |
169 test_function(); \ | |
170 } | |
171 | |
172 #define END() \ | |
173 core.Dump(&masm); \ | |
174 __ PopCalleeSavedRegisters(); \ | |
175 __ Ret(); \ | |
176 __ GetCode(NULL); | |
177 | |
178 #define TEARDOWN() v8::base::OS::Free(buf, actual_size); | |
179 | |
180 #endif // ifdef USE_SIMULATOR. | |
181 | |
182 #define CHECK_EQUAL_NZCV(expected) CHECK(EqualNzcv(expected, core.flags_nzcv())) | |
183 | |
184 #define CHECK_EQUAL_REGISTERS(expected) CHECK(EqualRegisters(&expected, &core)) | |
185 | |
186 #define CHECK_EQUAL_32(expected, result) \ | |
187 CHECK(Equal32(static_cast<uint32_t>(expected), &core, result)) | |
188 | |
189 #define CHECK_EQUAL_FP32(expected, result) \ | |
190 CHECK(EqualFP32(expected, &core, result)) | |
191 | |
192 #define CHECK_EQUAL_64(expected, result) CHECK(Equal64(expected, &core, result)) | |
193 | |
194 #define CHECK_EQUAL_FP64(expected, result) \ | |
195 CHECK(EqualFP64(expected, &core, result)) | |
196 | |
197 #ifdef DEBUG | |
198 #define CHECK_LITERAL_POOL_SIZE(expected) \ | |
199 CHECK((expected) == (__ LiteralPoolSize())) | |
200 #else | |
201 #define CHECK_LITERAL_POOL_SIZE(expected) ((void)0) | |
202 #endif | |
203 | |
204 // The maximum number of errors to report in detail for each test. | |
205 static const unsigned kErrorReportLimit = 8; | |
206 | |
207 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd, | |
208 const VRegister& vn); | |
209 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd, | |
210 const VRegister& vn, | |
211 const VRegister& vm); | |
212 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd, | |
213 const VRegister& vn, | |
214 const VRegister& vm, | |
215 int vm_index); | |
216 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)( | |
217 const VRegister& vd, int imm1, const VRegister& vn, int imm2); | |
218 | |
219 // This helps using the same typename for both the function pointer | |
220 // and the array of immediates passed to helper routines. | |
221 template <typename T> | |
222 class Test2OpImmediateNEONHelper_t { | |
223 public: | |
224 typedef void (MacroAssembler::*mnemonic)(const VRegister& vd, | |
225 const VRegister& vn, T imm); | |
226 }; | |
227 | |
228 namespace { | |
229 | |
230 // Maximum number of hex characters required to represent values of either | |
231 // templated type. | |
232 template <typename Ta, typename Tb> | |
233 unsigned MaxHexCharCount() { | |
234 unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb))); | |
235 return (count * 8) / 4; | |
236 } | |
237 | |
238 // ==== Tests for instructions of the form <INST> VReg, VReg. ==== | |
239 | |
240 void Test1OpNEON_Helper(Test1OpNEONHelper_t helper, uintptr_t inputs_n, | |
241 unsigned inputs_n_length, uintptr_t results, | |
242 VectorFormat vd_form, VectorFormat vn_form) { | |
243 DCHECK_NE(vd_form, kFormatUndefined); | |
244 DCHECK_NE(vn_form, kFormatUndefined); | |
245 | |
246 SETUP(); | |
247 START(); | |
248 | |
249 // Roll up the loop to keep the code size down. | |
250 Label loop_n; | |
251 | |
252 Register out = x0; | |
253 Register inputs_n_base = x1; | |
254 Register inputs_n_last_16bytes = x3; | |
255 Register index_n = x5; | |
256 | |
257 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); | |
258 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
259 | |
260 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); | |
261 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
262 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
263 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); | |
264 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); | |
265 | |
266 // These will be either a D- or a Q-register form, with a single lane | |
267 // (for use in scalar load and store operations). | |
268 VRegister vd = VRegister::Create(0, vd_bits); | |
269 VRegister vn = v1.V16B(); | |
270 VRegister vntmp = v3.V16B(); | |
271 | |
272 // These will have the correct format for use when calling 'helper'. | |
273 VRegister vd_helper = VRegister::Create(0, vd_bits, vd_lane_count); | |
274 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); | |
275 | |
276 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. | |
277 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); | |
278 | |
279 __ Mov(out, results); | |
280 | |
281 __ Mov(inputs_n_base, inputs_n); | |
282 __ Mov(inputs_n_last_16bytes, | |
283 inputs_n + (vn_lane_bytes * inputs_n_length) - 16); | |
284 | |
285 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); | |
286 | |
287 __ Mov(index_n, 0); | |
288 __ Bind(&loop_n); | |
289 | |
290 __ Ldr(vntmp_single, | |
291 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); | |
292 __ Ext(vn, vn, vntmp, vn_lane_bytes); | |
293 | |
294 // Set the destination to zero. | |
295 | |
296 // TODO(all): Setting the destination to values other than zero might be a | |
297 // better test for instructions such as sqxtn2 which may leave parts of V | |
298 // registers unchanged. | |
299 __ Movi(vd.V16B(), 0); | |
300 | |
301 (masm.*helper)(vd_helper, vn_helper); | |
302 | |
303 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex)); | |
304 | |
305 __ Add(index_n, index_n, 1); | |
306 __ Cmp(index_n, inputs_n_length); | |
307 __ B(lo, &loop_n); | |
308 | |
309 END(); | |
310 RUN(); | |
311 TEARDOWN(); | |
312 } | |
313 | |
314 // Test NEON instructions. The inputs_*[] and expected[] arrays should be | |
315 // arrays of rawbit representation of input values. This ensures that | |
316 // exact bit comparisons can be performed. | |
317 template <typename Td, typename Tn> | |
318 void Test1OpNEON(const char* name, Test1OpNEONHelper_t helper, | |
319 const Tn inputs_n[], unsigned inputs_n_length, | |
320 const Td expected[], unsigned expected_length, | |
321 VectorFormat vd_form, VectorFormat vn_form) { | |
322 DCHECK_GT(inputs_n_length, 0U); | |
323 | |
324 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
325 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
326 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
327 | |
328 const unsigned results_length = inputs_n_length; | |
329 Td* results = new Td[results_length * vd_lane_count]; | |
bbudge
2017/01/31 01:41:31
Use std::vector rather than manage raw array point
martyn.capewell
2017/02/03 11:01:31
I could do this, but I'd need to push dummy values
bbudge
2017/02/08 01:39:11
If the types Td are default constructible, you can
martyn.capewell
2017/02/15 11:51:00
Done.
| |
330 const unsigned lane_bit = sizeof(Td) * 8; | |
331 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); | |
332 | |
333 Test1OpNEON_Helper(helper, reinterpret_cast<uintptr_t>(inputs_n), | |
334 inputs_n_length, reinterpret_cast<uintptr_t>(results), | |
335 vd_form, vn_form); | |
336 | |
337 if (CcTest::sim_test_trace()) { | |
338 // Print the results. | |
339 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); | |
340 for (unsigned iteration = 0; iteration < results_length; iteration++) { | |
341 printf(" "); | |
342 // Output a separate result for each element of the result vector. | |
343 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
344 unsigned index = lane + (iteration * vd_lane_count); | |
345 printf(" 0x%0*" PRIx64 ",", lane_len_in_hex, | |
346 static_cast<uint64_t>(results[index])); | |
347 } | |
348 printf("\n"); | |
349 } | |
350 | |
351 printf("};\n"); | |
352 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", name, | |
353 results_length); | |
354 } else { | |
355 // Check the results. | |
356 CHECK(expected_length == results_length); | |
357 unsigned error_count = 0; | |
358 unsigned d = 0; | |
359 const char* padding = " "; | |
360 DCHECK_GE(strlen(padding), (lane_len_in_hex + 1)); | |
361 for (unsigned n = 0; n < inputs_n_length; n++, d++) { | |
362 bool error_in_vector = false; | |
363 | |
364 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
365 unsigned output_index = (n * vd_lane_count) + lane; | |
366 | |
367 if (results[output_index] != expected[output_index]) { | |
368 error_in_vector = true; | |
369 break; | |
370 } | |
371 } | |
372 | |
373 if (error_in_vector && (++error_count <= kErrorReportLimit)) { | |
374 printf("%s\n", name); | |
375 printf(" Vn%.*s| Vd%.*s| Expected\n", lane_len_in_hex + 1, padding, | |
376 lane_len_in_hex + 1, padding); | |
377 | |
378 const unsigned first_index_n = | |
379 inputs_n_length - (16 / vn_lane_bytes) + n + 1; | |
380 | |
381 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count); | |
382 lane++) { | |
383 unsigned output_index = (n * vd_lane_count) + lane; | |
384 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; | |
385 | |
386 printf( | |
387 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 | |
388 " " | |
389 "| 0x%0*" PRIx64 "\n", | |
390 results[output_index] != expected[output_index] ? '*' : ' ', | |
391 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]), | |
392 lane_len_in_hex, static_cast<uint64_t>(results[output_index]), | |
393 lane_len_in_hex, static_cast<uint64_t>(expected[output_index])); | |
394 } | |
395 } | |
396 } | |
397 DCHECK_EQ(d, expected_length); | |
398 if (error_count > kErrorReportLimit) { | |
399 printf("%u other errors follow.\n", error_count - kErrorReportLimit); | |
400 } | |
401 DCHECK_EQ(error_count, 0U); | |
402 } | |
403 delete[] results; | |
404 } | |
405 | |
406 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ==== | |
407 // where <V> is one of B, H, S or D registers. | |
408 // e.g. saddlv H1, v0.8B | |
409 | |
410 // TODO(all): Change tests to store all lanes of the resulting V register. | |
411 // Some tests store all 128 bits of the resulting V register to | |
412 // check the simulator's behaviour on the rest of the register. | |
413 // This is better than storing the affected lanes only. | |
414 // Change any tests such as the 'Across' template to do the same. | |
415 | |
416 void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper, uintptr_t inputs_n, | |
417 unsigned inputs_n_length, uintptr_t results, | |
418 VectorFormat vd_form, VectorFormat vn_form) { | |
419 DCHECK_NE(vd_form, kFormatUndefined); | |
420 DCHECK_NE(vn_form, kFormatUndefined); | |
421 | |
422 SETUP(); | |
423 START(); | |
424 | |
425 // Roll up the loop to keep the code size down. | |
426 Label loop_n; | |
427 | |
428 Register out = x0; | |
429 Register inputs_n_base = x1; | |
430 Register inputs_n_last_vector = x3; | |
431 Register index_n = x5; | |
432 | |
433 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); | |
434 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); | |
435 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
436 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
437 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); | |
438 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); | |
439 | |
440 // Test destructive operations by (arbitrarily) using the same register for | |
441 // B and S lane sizes. | |
442 bool destructive = (vd_bits == kBRegSize) || (vd_bits == kSRegSize); | |
443 | |
444 // These will be either a D- or a Q-register form, with a single lane | |
445 // (for use in scalar load and store operations). | |
446 // Create two aliases for v8; the first is the destination for the tested | |
447 // instruction, the second, the whole Q register to check the results. | |
448 VRegister vd = VRegister::Create(0, vd_bits); | |
449 VRegister vdstr = VRegister::Create(0, kQRegSizeInBits); | |
450 | |
451 VRegister vn = VRegister::Create(1, vn_bits); | |
452 VRegister vntmp = VRegister::Create(3, vn_bits); | |
453 | |
454 // These will have the correct format for use when calling 'helper'. | |
455 VRegister vd_helper = VRegister::Create(0, vn_bits, vn_lane_count); | |
456 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); | |
457 | |
458 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. | |
459 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); | |
460 | |
461 // Same registers for use in the 'ext' instructions. | |
462 VRegister vn_ext = (kDRegSizeInBits == vn_bits) ? vn.V8B() : vn.V16B(); | |
463 VRegister vntmp_ext = | |
464 (kDRegSizeInBits == vn_bits) ? vntmp.V8B() : vntmp.V16B(); | |
465 | |
466 __ Mov(out, results); | |
467 | |
468 __ Mov(inputs_n_base, inputs_n); | |
469 __ Mov(inputs_n_last_vector, | |
470 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count)); | |
471 | |
472 __ Ldr(vn, MemOperand(inputs_n_last_vector)); | |
473 | |
474 __ Mov(index_n, 0); | |
475 __ Bind(&loop_n); | |
476 | |
477 __ Ldr(vntmp_single, | |
478 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); | |
479 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes); | |
480 | |
481 if (destructive) { | |
482 __ Mov(vd_helper, vn_helper); | |
483 (masm.*helper)(vd, vd_helper); | |
484 } else { | |
485 (masm.*helper)(vd, vn_helper); | |
486 } | |
487 | |
488 __ Str(vdstr, MemOperand(out, kQRegSize, PostIndex)); | |
489 | |
490 __ Add(index_n, index_n, 1); | |
491 __ Cmp(index_n, inputs_n_length); | |
492 __ B(lo, &loop_n); | |
493 | |
494 END(); | |
495 RUN(); | |
496 TEARDOWN(); | |
497 } | |
498 | |
499 // Test NEON instructions. The inputs_*[] and expected[] arrays should be | |
500 // arrays of rawbit representation of input values. This ensures that | |
501 // exact bit comparisons can be performed. | |
502 template <typename Td, typename Tn> | |
503 void Test1OpAcrossNEON(const char* name, Test1OpNEONHelper_t helper, | |
504 const Tn inputs_n[], unsigned inputs_n_length, | |
505 const Td expected[], unsigned expected_length, | |
506 VectorFormat vd_form, VectorFormat vn_form) { | |
507 DCHECK_GT(inputs_n_length, 0U); | |
508 | |
509 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
510 const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form); | |
511 | |
512 const unsigned results_length = inputs_n_length; | |
513 Td* results = new Td[results_length * vd_lanes_per_q]; | |
514 const unsigned lane_bit = sizeof(Td) * 8; | |
515 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); | |
516 | |
517 Test1OpAcrossNEON_Helper( | |
518 helper, reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, | |
519 reinterpret_cast<uintptr_t>(results), vd_form, vn_form); | |
520 | |
521 if (CcTest::sim_test_trace()) { | |
522 // Print the results. | |
523 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); | |
524 for (unsigned iteration = 0; iteration < results_length; iteration++) { | |
525 printf(" "); | |
526 // Output a separate result for each element of the result vector. | |
527 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
528 unsigned index = lane + (iteration * vd_lane_count); | |
529 printf(" 0x%0*" PRIx64 ",", lane_len_in_hex, | |
530 static_cast<uint64_t>(results[index])); | |
531 } | |
532 printf("\n"); | |
533 } | |
534 | |
535 printf("};\n"); | |
536 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", name, | |
537 results_length); | |
538 } else { | |
539 // Check the results. | |
540 DCHECK_EQ(expected_length, results_length); | |
541 unsigned error_count = 0; | |
542 unsigned d = 0; | |
543 const char* padding = " "; | |
544 DCHECK_GE(strlen(padding), (lane_len_in_hex + 1)); | |
545 for (unsigned n = 0; n < inputs_n_length; n++, d++) { | |
546 bool error_in_vector = false; | |
547 | |
548 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
549 unsigned expected_index = (n * vd_lane_count) + lane; | |
550 unsigned results_index = (n * vd_lanes_per_q) + lane; | |
551 | |
552 if (results[results_index] != expected[expected_index]) { | |
553 error_in_vector = true; | |
554 break; | |
555 } | |
556 | |
557 // For across operations, the remaining lanes should be zero. | |
558 for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) { | |
559 unsigned results_index = (n * vd_lanes_per_q) + lane; | |
560 if (results[results_index] != 0) { | |
561 error_in_vector = true; | |
562 break; | |
563 } | |
564 } | |
565 } | |
566 | |
567 if (error_in_vector && (++error_count <= kErrorReportLimit)) { | |
568 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
569 | |
570 printf("%s\n", name); | |
571 printf(" Vn%.*s| Vd%.*s| Expected\n", lane_len_in_hex + 1, padding, | |
572 lane_len_in_hex + 1, padding); | |
573 | |
574 // TODO(all): In case of an error, all tests print out as many elements | |
575 // as there are lanes in the output or input vectors. This way the | |
576 // viewer can read all the values that were needed for the operation | |
577 // but the output contains also unnecessary values. These prints can be | |
578 // improved according to the arguments passed to test functions. | |
579 // This output for the 'Across' category has the required modifications. | |
bbudge
2017/01/31 01:41:31
It's not clear what is "to be done" here.
martyn.capewell
2017/02/03 11:01:31
I think it's saying that, on error, it prints out
bbudge
2017/02/08 01:39:11
OK
| |
580 for (unsigned lane = 0; lane < vn_lane_count; lane++) { | |
581 unsigned results_index = | |
582 (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane); | |
583 unsigned input_index_n = | |
584 (inputs_n_length - vn_lane_count + n + 1 + lane) % | |
585 inputs_n_length; | |
586 | |
587 Td expect = 0; | |
588 if ((vn_lane_count - 1) == lane) { | |
589 // This is the last lane to be printed, ie. the least-significant | |
590 // lane, so use the expected value; any other lane should be zero. | |
591 unsigned expected_index = n * vd_lane_count; | |
592 expect = expected[expected_index]; | |
593 } | |
594 printf("%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", | |
595 results[results_index] != expect ? '*' : ' ', lane_len_in_hex, | |
596 static_cast<uint64_t>(inputs_n[input_index_n]), | |
597 lane_len_in_hex, static_cast<uint64_t>(results[results_index]), | |
598 lane_len_in_hex, static_cast<uint64_t>(expect)); | |
599 } | |
600 } | |
601 } | |
602 DCHECK_EQ(d, expected_length); | |
603 if (error_count > kErrorReportLimit) { | |
604 printf("%u other errors follow.\n", error_count - kErrorReportLimit); | |
605 } | |
606 DCHECK_EQ(error_count, 0U); | |
607 } | |
608 delete[] results; | |
609 } | |
610 | |
611 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ==== | |
612 | |
613 void Test2OpNEON_Helper(Test2OpNEONHelper_t helper, uintptr_t inputs_d, | |
614 uintptr_t inputs_n, unsigned inputs_n_length, | |
615 uintptr_t inputs_m, unsigned inputs_m_length, | |
616 uintptr_t results, VectorFormat vd_form, | |
617 VectorFormat vn_form, VectorFormat vm_form) { | |
618 DCHECK_NE(vd_form, kFormatUndefined); | |
619 DCHECK_NE(vn_form, kFormatUndefined); | |
620 DCHECK_NE(vm_form, kFormatUndefined); | |
621 | |
622 SETUP(); | |
623 START(); | |
624 | |
625 // Roll up the loop to keep the code size down. | |
626 Label loop_n, loop_m; | |
627 | |
628 Register out = x0; | |
629 Register inputs_n_base = x1; | |
630 Register inputs_m_base = x2; | |
631 Register inputs_d_base = x3; | |
632 Register inputs_n_last_16bytes = x4; | |
633 Register inputs_m_last_16bytes = x5; | |
634 Register index_n = x6; | |
635 Register index_m = x7; | |
636 | |
637 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); | |
638 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
639 | |
640 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); | |
641 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
642 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
643 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); | |
644 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); | |
645 | |
646 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form); | |
647 const unsigned vm_lane_count = LaneCountFromFormat(vm_form); | |
648 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form); | |
649 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form); | |
650 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form); | |
651 | |
652 // Always load and store 128 bits regardless of the format. | |
653 VRegister vd = v0.V16B(); | |
654 VRegister vn = v1.V16B(); | |
655 VRegister vm = v2.V16B(); | |
656 VRegister vntmp = v3.V16B(); | |
657 VRegister vmtmp = v4.V16B(); | |
658 VRegister vres = v5.V16B(); | |
659 | |
660 // These will have the correct format for calling the 'helper'. | |
661 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); | |
662 VRegister vm_helper = VRegister::Create(2, vm_bits, vm_lane_count); | |
663 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count); | |
664 | |
665 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. | |
666 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); | |
667 VRegister vmtmp_single = VRegister::Create(4, vm_lane_bits); | |
668 | |
669 __ Mov(out, results); | |
670 | |
671 __ Mov(inputs_d_base, inputs_d); | |
672 | |
673 __ Mov(inputs_n_base, inputs_n); | |
674 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16)); | |
675 __ Mov(inputs_m_base, inputs_m); | |
676 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16)); | |
677 | |
678 __ Ldr(vd, MemOperand(inputs_d_base)); | |
679 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); | |
680 __ Ldr(vm, MemOperand(inputs_m_last_16bytes)); | |
681 | |
682 __ Mov(index_n, 0); | |
683 __ Bind(&loop_n); | |
684 | |
685 __ Ldr(vntmp_single, | |
686 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); | |
687 __ Ext(vn, vn, vntmp, vn_lane_bytes); | |
688 | |
689 __ Mov(index_m, 0); | |
690 __ Bind(&loop_m); | |
691 | |
692 __ Ldr(vmtmp_single, | |
693 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2)); | |
694 __ Ext(vm, vm, vmtmp, vm_lane_bytes); | |
695 | |
696 __ Mov(vres, vd); | |
697 | |
698 (masm.*helper)(vres_helper, vn_helper, vm_helper); | |
699 | |
700 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex)); | |
701 | |
702 __ Add(index_m, index_m, 1); | |
703 __ Cmp(index_m, inputs_m_length); | |
704 __ B(lo, &loop_m); | |
705 | |
706 __ Add(index_n, index_n, 1); | |
707 __ Cmp(index_n, inputs_n_length); | |
708 __ B(lo, &loop_n); | |
709 | |
710 END(); | |
711 RUN(); | |
712 TEARDOWN(); | |
713 } | |
714 | |
715 // Test NEON instructions. The inputs_*[] and expected[] arrays should be | |
716 // arrays of rawbit representation of input values. This ensures that | |
717 // exact bit comparisons can be performed. | |
718 template <typename Td, typename Tn, typename Tm> | |
719 void Test2OpNEON(const char* name, Test2OpNEONHelper_t helper, | |
720 const Td inputs_d[], const Tn inputs_n[], | |
721 unsigned inputs_n_length, const Tm inputs_m[], | |
722 unsigned inputs_m_length, const Td expected[], | |
723 unsigned expected_length, VectorFormat vd_form, | |
724 VectorFormat vn_form, VectorFormat vm_form) { | |
725 DCHECK(inputs_n_length > 0 && inputs_m_length > 0); | |
726 | |
727 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form); | |
728 | |
729 const unsigned results_length = inputs_n_length * inputs_m_length; | |
730 Td* results = new Td[results_length * vd_lane_count]; | |
731 const unsigned lane_bit = sizeof(Td) * 8; | |
732 const unsigned lane_len_in_hex = | |
733 static_cast<unsigned>(std::max(sizeof(Td), sizeof(Tm)) * 8) / 4; | |
734 | |
735 Test2OpNEON_Helper(helper, reinterpret_cast<uintptr_t>(inputs_d), | |
736 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, | |
737 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length, | |
738 reinterpret_cast<uintptr_t>(results), vd_form, vn_form, | |
739 vm_form); | |
740 | |
741 if (CcTest::sim_test_trace()) { | |
742 // Print the results. | |
743 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); | |
744 for (unsigned iteration = 0; iteration < results_length; iteration++) { | |
745 printf(" "); | |
746 // Output a separate result for each element of the result vector. | |
747 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
748 unsigned index = lane + (iteration * vd_lane_count); | |
749 printf(" 0x%0*" PRIx64 ",", lane_len_in_hex, | |
750 static_cast<uint64_t>(results[index])); | |
751 } | |
752 printf("\n"); | |
753 } | |
754 | |
755 printf("};\n"); | |
756 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", name, | |
757 results_length); | |
758 } else { | |
759 // Check the results. | |
760 CHECK(expected_length == results_length); | |
761 unsigned error_count = 0; | |
762 unsigned d = 0; | |
763 const char* padding = " "; | |
764 DCHECK_GE(strlen(padding), lane_len_in_hex + 1); | |
765 for (unsigned n = 0; n < inputs_n_length; n++) { | |
766 for (unsigned m = 0; m < inputs_m_length; m++, d++) { | |
767 bool error_in_vector = false; | |
768 | |
769 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
770 unsigned output_index = (n * inputs_m_length * vd_lane_count) + | |
771 (m * vd_lane_count) + lane; | |
772 | |
773 if (results[output_index] != expected[output_index]) { | |
774 error_in_vector = true; | |
775 break; | |
776 } | |
777 } | |
778 | |
779 if (error_in_vector && (++error_count <= kErrorReportLimit)) { | |
780 printf("%s\n", name); | |
781 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Vd%.*s| Expected\n", | |
782 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding, | |
783 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding); | |
784 | |
785 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
786 unsigned output_index = (n * inputs_m_length * vd_lane_count) + | |
787 (m * vd_lane_count) + lane; | |
788 unsigned input_index_n = | |
789 (inputs_n_length - vd_lane_count + n + 1 + lane) % | |
790 inputs_n_length; | |
791 unsigned input_index_m = | |
792 (inputs_m_length - vd_lane_count + m + 1 + lane) % | |
793 inputs_m_length; | |
794 | |
795 printf( | |
796 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 | |
797 " " | |
798 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", | |
799 results[output_index] != expected[output_index] ? '*' : ' ', | |
800 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]), | |
801 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]), | |
802 lane_len_in_hex, static_cast<uint64_t>(inputs_m[input_index_m]), | |
803 lane_len_in_hex, static_cast<uint64_t>(results[output_index]), | |
804 lane_len_in_hex, static_cast<uint64_t>(expected[output_index])); | |
805 } | |
806 } | |
807 } | |
808 } | |
809 DCHECK_EQ(d, expected_length); | |
810 if (error_count > kErrorReportLimit) { | |
811 printf("%u other errors follow.\n", error_count - kErrorReportLimit); | |
812 } | |
813 DCHECK_EQ(error_count, 0U); | |
814 } | |
815 delete[] results; | |
816 } | |
817 | |
818 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ==== | |
819 | |
820 void TestByElementNEON_Helper(TestByElementNEONHelper_t helper, | |
821 uintptr_t inputs_d, uintptr_t inputs_n, | |
822 unsigned inputs_n_length, uintptr_t inputs_m, | |
823 unsigned inputs_m_length, const int indices[], | |
824 unsigned indices_length, uintptr_t results, | |
825 VectorFormat vd_form, VectorFormat vn_form, | |
826 VectorFormat vm_form) { | |
827 DCHECK_NE(vd_form, kFormatUndefined); | |
828 DCHECK_NE(vn_form, kFormatUndefined); | |
829 DCHECK_NE(vm_form, kFormatUndefined); | |
830 | |
831 SETUP(); | |
832 START(); | |
833 | |
834 // Roll up the loop to keep the code size down. | |
835 Label loop_n, loop_m; | |
836 | |
837 Register out = x0; | |
838 Register inputs_n_base = x1; | |
839 Register inputs_m_base = x2; | |
840 Register inputs_d_base = x3; | |
841 Register inputs_n_last_16bytes = x4; | |
842 Register inputs_m_last_16bytes = x5; | |
843 Register index_n = x6; | |
844 Register index_m = x7; | |
845 | |
846 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); | |
847 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
848 | |
849 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); | |
850 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
851 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
852 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); | |
853 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); | |
854 | |
855 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form); | |
856 const unsigned vm_lane_count = LaneCountFromFormat(vm_form); | |
857 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form); | |
858 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form); | |
859 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form); | |
860 | |
861 // Always load and store 128 bits regardless of the format. | |
862 VRegister vd = v0.V16B(); | |
863 VRegister vn = v1.V16B(); | |
864 VRegister vm = v2.V16B(); | |
865 VRegister vntmp = v3.V16B(); | |
866 VRegister vmtmp = v4.V16B(); | |
867 VRegister vres = v5.V16B(); | |
868 | |
869 // These will have the correct format for calling the 'helper'. | |
870 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); | |
871 VRegister vm_helper = VRegister::Create(2, vm_bits, vm_lane_count); | |
872 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count); | |
873 | |
874 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. | |
875 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); | |
876 VRegister vmtmp_single = VRegister::Create(4, vm_lane_bits); | |
877 | |
878 __ Mov(out, results); | |
879 | |
880 __ Mov(inputs_d_base, inputs_d); | |
881 | |
882 __ Mov(inputs_n_base, inputs_n); | |
883 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16)); | |
884 __ Mov(inputs_m_base, inputs_m); | |
885 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16)); | |
886 | |
887 __ Ldr(vd, MemOperand(inputs_d_base)); | |
888 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); | |
889 __ Ldr(vm, MemOperand(inputs_m_last_16bytes)); | |
890 | |
891 __ Mov(index_n, 0); | |
892 __ Bind(&loop_n); | |
893 | |
894 __ Ldr(vntmp_single, | |
895 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); | |
896 __ Ext(vn, vn, vntmp, vn_lane_bytes); | |
897 | |
898 __ Mov(index_m, 0); | |
899 __ Bind(&loop_m); | |
900 | |
901 __ Ldr(vmtmp_single, | |
902 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2)); | |
903 __ Ext(vm, vm, vmtmp, vm_lane_bytes); | |
904 | |
905 __ Mov(vres, vd); | |
906 { | |
907 for (unsigned i = 0; i < indices_length; i++) { | |
908 (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]); | |
909 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex)); | |
910 } | |
911 } | |
912 | |
913 __ Add(index_m, index_m, 1); | |
914 __ Cmp(index_m, inputs_m_length); | |
915 __ B(lo, &loop_m); | |
916 | |
917 __ Add(index_n, index_n, 1); | |
918 __ Cmp(index_n, inputs_n_length); | |
919 __ B(lo, &loop_n); | |
920 | |
921 END(); | |
922 RUN(); | |
923 TEARDOWN(); | |
924 } | |
925 | |
926 // Test NEON instructions. The inputs_*[] and expected[] arrays should be | |
927 // arrays of rawbit representation of input values. This ensures that | |
928 // exact bit comparisons can be performed. | |
929 template <typename Td, typename Tn, typename Tm> | |
930 void TestByElementNEON(const char* name, TestByElementNEONHelper_t helper, | |
931 const Td inputs_d[], const Tn inputs_n[], | |
932 unsigned inputs_n_length, const Tm inputs_m[], | |
933 unsigned inputs_m_length, const int indices[], | |
934 unsigned indices_length, const Td expected[], | |
935 unsigned expected_length, VectorFormat vd_form, | |
936 VectorFormat vn_form, VectorFormat vm_form) { | |
937 DCHECK_GT(inputs_n_length, 0U); | |
938 DCHECK_GT(inputs_m_length, 0U); | |
939 DCHECK_GT(indices_length, 0U); | |
940 | |
941 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form); | |
942 | |
943 const unsigned results_length = | |
944 inputs_n_length * inputs_m_length * indices_length; | |
945 Td* results = new Td[results_length * vd_lane_count]; | |
946 const unsigned lane_bit = sizeof(Td) * 8; | |
947 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>(); | |
948 | |
949 TestByElementNEON_Helper( | |
950 helper, reinterpret_cast<uintptr_t>(inputs_d), | |
951 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length, | |
952 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length, indices, | |
953 indices_length, reinterpret_cast<uintptr_t>(results), vd_form, vn_form, | |
954 vm_form); | |
955 | |
956 if (CcTest::sim_test_trace()) { | |
957 // Print the results. | |
958 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); | |
959 for (unsigned iteration = 0; iteration < results_length; iteration++) { | |
960 printf(" "); | |
961 // Output a separate result for each element of the result vector. | |
962 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
963 unsigned index = lane + (iteration * vd_lane_count); | |
964 printf(" 0x%0*" PRIx64 ",", lane_len_in_hex, | |
965 static_cast<uint64_t>(results[index])); | |
966 } | |
967 printf("\n"); | |
968 } | |
969 | |
970 printf("};\n"); | |
971 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", name, | |
972 results_length); | |
973 } else { | |
974 // Check the results. | |
975 CHECK(expected_length == results_length); | |
976 unsigned error_count = 0; | |
977 unsigned d = 0; | |
978 const char* padding = " "; | |
979 DCHECK_GE(strlen(padding), lane_len_in_hex + 1); | |
980 for (unsigned n = 0; n < inputs_n_length; n++) { | |
981 for (unsigned m = 0; m < inputs_m_length; m++) { | |
982 for (unsigned index = 0; index < indices_length; index++, d++) { | |
983 bool error_in_vector = false; | |
984 | |
985 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
986 unsigned output_index = | |
987 (n * inputs_m_length * indices_length * vd_lane_count) + | |
988 (m * indices_length * vd_lane_count) + (index * vd_lane_count) + | |
989 lane; | |
990 | |
991 if (results[output_index] != expected[output_index]) { | |
992 error_in_vector = true; | |
993 break; | |
994 } | |
995 } | |
996 | |
997 if (error_in_vector && (++error_count <= kErrorReportLimit)) { | |
998 printf("%s\n", name); | |
999 printf(" Vd%.*s| Vn%.*s| Vm%.*s| Index | Vd%.*s| Expected\n", | |
1000 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding, | |
1001 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding); | |
1002 | |
1003 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
1004 unsigned output_index = | |
1005 (n * inputs_m_length * indices_length * vd_lane_count) + | |
1006 (m * indices_length * vd_lane_count) + | |
1007 (index * vd_lane_count) + lane; | |
1008 unsigned input_index_n = | |
1009 (inputs_n_length - vd_lane_count + n + 1 + lane) % | |
1010 inputs_n_length; | |
1011 unsigned input_index_m = | |
1012 (inputs_m_length - vd_lane_count + m + 1 + lane) % | |
1013 inputs_m_length; | |
1014 | |
1015 printf( | |
1016 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 | |
1017 " " | |
1018 "| [%3d] | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", | |
1019 results[output_index] != expected[output_index] ? '*' : ' ', | |
1020 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]), | |
1021 lane_len_in_hex, | |
1022 static_cast<uint64_t>(inputs_n[input_index_n]), | |
1023 lane_len_in_hex, | |
1024 static_cast<uint64_t>(inputs_m[input_index_m]), | |
1025 indices[index], lane_len_in_hex, | |
1026 static_cast<uint64_t>(results[output_index]), lane_len_in_hex, | |
1027 static_cast<uint64_t>(expected[output_index])); | |
1028 } | |
1029 } | |
1030 } | |
1031 } | |
1032 } | |
1033 DCHECK_EQ(d, expected_length); | |
1034 if (error_count > kErrorReportLimit) { | |
1035 printf("%u other errors follow.\n", error_count - kErrorReportLimit); | |
1036 } | |
1037 CHECK(error_count == 0); | |
1038 } | |
1039 delete[] results; | |
1040 } | |
1041 | |
1042 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ==== | |
1043 | |
1044 template <typename Tm> | |
1045 void Test2OpImmNEON_Helper( | |
1046 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, | |
1047 uintptr_t inputs_n, unsigned inputs_n_length, const Tm inputs_m[], | |
1048 unsigned inputs_m_length, uintptr_t results, VectorFormat vd_form, | |
1049 VectorFormat vn_form) { | |
1050 DCHECK(vd_form != kFormatUndefined && vn_form != kFormatUndefined); | |
1051 | |
1052 SETUP(); | |
1053 START(); | |
1054 | |
1055 // Roll up the loop to keep the code size down. | |
1056 Label loop_n; | |
1057 | |
1058 Register out = x0; | |
1059 Register inputs_n_base = x1; | |
1060 Register inputs_n_last_16bytes = x3; | |
1061 Register index_n = x5; | |
1062 | |
1063 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); | |
1064 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
1065 | |
1066 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); | |
1067 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
1068 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
1069 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); | |
1070 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); | |
1071 | |
1072 // These will be either a D- or a Q-register form, with a single lane | |
1073 // (for use in scalar load and store operations). | |
1074 VRegister vd = VRegister::Create(0, vd_bits); | |
1075 VRegister vn = v1.V16B(); | |
1076 VRegister vntmp = v3.V16B(); | |
1077 | |
1078 // These will have the correct format for use when calling 'helper'. | |
1079 VRegister vd_helper = VRegister::Create(0, vd_bits, vd_lane_count); | |
1080 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); | |
1081 | |
1082 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. | |
1083 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits); | |
1084 | |
1085 __ Mov(out, results); | |
1086 | |
1087 __ Mov(inputs_n_base, inputs_n); | |
1088 __ Mov(inputs_n_last_16bytes, | |
1089 inputs_n + (vn_lane_bytes * inputs_n_length) - 16); | |
1090 | |
1091 __ Ldr(vn, MemOperand(inputs_n_last_16bytes)); | |
1092 | |
1093 __ Mov(index_n, 0); | |
1094 __ Bind(&loop_n); | |
1095 | |
1096 __ Ldr(vntmp_single, | |
1097 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); | |
1098 __ Ext(vn, vn, vntmp, vn_lane_bytes); | |
1099 | |
1100 // Set the destination to zero for tests such as '[r]shrn2'. | |
1101 // TODO(all): Setting the destination to values other than zero might be a | |
1102 // better test for shift and accumulate instructions (srsra/ssra/usra/ursra). | |
1103 __ Movi(vd.V16B(), 0); | |
1104 | |
1105 { | |
1106 for (unsigned i = 0; i < inputs_m_length; i++) { | |
1107 (masm.*helper)(vd_helper, vn_helper, inputs_m[i]); | |
1108 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex)); | |
1109 } | |
1110 } | |
1111 | |
1112 __ Add(index_n, index_n, 1); | |
1113 __ Cmp(index_n, inputs_n_length); | |
1114 __ B(lo, &loop_n); | |
1115 | |
1116 END(); | |
1117 RUN(); | |
1118 TEARDOWN(); | |
1119 } | |
1120 | |
1121 // Test NEON instructions. The inputs_*[] and expected[] arrays should be | |
1122 // arrays of rawbit representation of input values. This ensures that | |
1123 // exact bit comparisons can be performed. | |
1124 template <typename Td, typename Tn, typename Tm> | |
1125 void Test2OpImmNEON(const char* name, | |
1126 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper, | |
1127 const Tn inputs_n[], unsigned inputs_n_length, | |
1128 const Tm inputs_m[], unsigned inputs_m_length, | |
1129 const Td expected[], unsigned expected_length, | |
1130 VectorFormat vd_form, VectorFormat vn_form) { | |
1131 DCHECK(inputs_n_length > 0 && inputs_m_length > 0); | |
1132 | |
1133 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
1134 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
1135 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
1136 | |
1137 const unsigned results_length = inputs_n_length * inputs_m_length; | |
1138 Td* results = new Td[results_length * vd_lane_count]; | |
1139 const unsigned lane_bit = sizeof(Td) * 8; | |
1140 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); | |
1141 | |
1142 Test2OpImmNEON_Helper(helper, reinterpret_cast<uintptr_t>(inputs_n), | |
1143 inputs_n_length, inputs_m, inputs_m_length, | |
1144 reinterpret_cast<uintptr_t>(results), vd_form, vn_form); | |
1145 | |
1146 if (CcTest::sim_test_trace()) { | |
1147 // Print the results. | |
1148 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); | |
1149 for (unsigned iteration = 0; iteration < results_length; iteration++) { | |
1150 printf(" "); | |
1151 // Output a separate result for each element of the result vector. | |
1152 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
1153 unsigned index = lane + (iteration * vd_lane_count); | |
1154 printf(" 0x%0*" PRIx64 ",", lane_len_in_hex, | |
1155 static_cast<uint64_t>(results[index])); | |
1156 } | |
1157 printf("\n"); | |
1158 } | |
1159 | |
1160 printf("};\n"); | |
1161 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", name, | |
1162 results_length); | |
1163 } else { | |
1164 // Check the results. | |
1165 CHECK(expected_length == results_length); | |
1166 unsigned error_count = 0; | |
1167 unsigned d = 0; | |
1168 const char* padding = " "; | |
1169 DCHECK_GE(strlen(padding), lane_len_in_hex + 1); | |
1170 for (unsigned n = 0; n < inputs_n_length; n++) { | |
1171 for (unsigned m = 0; m < inputs_m_length; m++, d++) { | |
1172 bool error_in_vector = false; | |
1173 | |
1174 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
1175 unsigned output_index = (n * inputs_m_length * vd_lane_count) + | |
1176 (m * vd_lane_count) + lane; | |
1177 | |
1178 if (results[output_index] != expected[output_index]) { | |
1179 error_in_vector = true; | |
1180 break; | |
1181 } | |
1182 } | |
1183 | |
1184 if (error_in_vector && (++error_count <= kErrorReportLimit)) { | |
1185 printf("%s\n", name); | |
1186 printf(" Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", lane_len_in_hex + 1, | |
1187 padding, lane_len_in_hex, padding, lane_len_in_hex + 1, | |
1188 padding); | |
1189 | |
1190 const unsigned first_index_n = | |
1191 inputs_n_length - (16 / vn_lane_bytes) + n + 1; | |
1192 | |
1193 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count); | |
1194 lane++) { | |
1195 unsigned output_index = (n * inputs_m_length * vd_lane_count) + | |
1196 (m * vd_lane_count) + lane; | |
1197 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; | |
1198 unsigned input_index_m = m; | |
1199 | |
1200 printf( | |
1201 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 | |
1202 " " | |
1203 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", | |
1204 results[output_index] != expected[output_index] ? '*' : ' ', | |
1205 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]), | |
1206 lane_len_in_hex, static_cast<uint64_t>(inputs_m[input_index_m]), | |
1207 lane_len_in_hex, static_cast<uint64_t>(results[output_index]), | |
1208 lane_len_in_hex, static_cast<uint64_t>(expected[output_index])); | |
1209 } | |
1210 } | |
1211 } | |
1212 } | |
1213 DCHECK_EQ(d, expected_length); | |
1214 if (error_count > kErrorReportLimit) { | |
1215 printf("%u other errors follow.\n", error_count - kErrorReportLimit); | |
1216 } | |
1217 CHECK(error_count == 0); | |
1218 } | |
1219 delete[] results; | |
1220 } | |
1221 | |
1222 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ==== | |
1223 | |
1224 void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper, | |
1225 uintptr_t inputs_d, const int inputs_imm1[], | |
1226 unsigned inputs_imm1_length, uintptr_t inputs_n, | |
1227 unsigned inputs_n_length, | |
1228 const int inputs_imm2[], | |
1229 unsigned inputs_imm2_length, uintptr_t results, | |
1230 VectorFormat vd_form, VectorFormat vn_form) { | |
1231 DCHECK_NE(vd_form, kFormatUndefined); | |
1232 DCHECK_NE(vn_form, kFormatUndefined); | |
1233 | |
1234 SETUP(); | |
1235 START(); | |
1236 | |
1237 // Roll up the loop to keep the code size down. | |
1238 Label loop_n; | |
1239 | |
1240 Register out = x0; | |
1241 Register inputs_d_base = x1; | |
1242 Register inputs_n_base = x2; | |
1243 Register inputs_n_last_vector = x4; | |
1244 Register index_n = x6; | |
1245 | |
1246 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form); | |
1247 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
1248 | |
1249 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form); | |
1250 const unsigned vn_lane_count = LaneCountFromFormat(vn_form); | |
1251 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form); | |
1252 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form); | |
1253 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form); | |
1254 | |
1255 // These will be either a D- or a Q-register form, with a single lane | |
1256 // (for use in scalar load and store operations). | |
1257 VRegister vd = VRegister::Create(0, vd_bits); | |
1258 VRegister vn = VRegister::Create(1, vn_bits); | |
1259 VRegister vntmp = VRegister::Create(4, vn_bits); | |
1260 VRegister vres = VRegister::Create(5, vn_bits); | |
1261 | |
1262 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count); | |
1263 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count); | |
1264 | |
1265 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'. | |
1266 VRegister vntmp_single = VRegister::Create(4, vn_lane_bits); | |
1267 | |
1268 // Same registers for use in the 'ext' instructions. | |
1269 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B(); | |
1270 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B(); | |
1271 | |
1272 __ Mov(out, results); | |
1273 | |
1274 __ Mov(inputs_d_base, inputs_d); | |
1275 | |
1276 __ Mov(inputs_n_base, inputs_n); | |
1277 __ Mov(inputs_n_last_vector, | |
1278 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count)); | |
1279 | |
1280 __ Ldr(vd, MemOperand(inputs_d_base)); | |
1281 | |
1282 __ Ldr(vn, MemOperand(inputs_n_last_vector)); | |
1283 | |
1284 __ Mov(index_n, 0); | |
1285 __ Bind(&loop_n); | |
1286 | |
1287 __ Ldr(vntmp_single, | |
1288 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2)); | |
1289 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes); | |
1290 | |
1291 for (unsigned i = 0; i < inputs_imm1_length; i++) { | |
1292 for (unsigned j = 0; j < inputs_imm2_length; j++) { | |
1293 __ Mov(vres, vd); | |
1294 (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]); | |
1295 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex)); | |
1296 } | |
1297 } | |
1298 | |
1299 __ Add(index_n, index_n, 1); | |
1300 __ Cmp(index_n, inputs_n_length); | |
1301 __ B(lo, &loop_n); | |
1302 | |
1303 END(); | |
1304 RUN(); | |
1305 TEARDOWN(); | |
1306 } | |
1307 | |
1308 // Test NEON instructions. The inputs_*[] and expected[] arrays should be | |
1309 // arrays of rawbit representation of input values. This ensures that | |
1310 // exact bit comparisons can be performed. | |
1311 template <typename Td, typename Tn> | |
1312 void TestOpImmOpImmNEON(const char* name, | |
1313 TestOpImmOpImmVdUpdateNEONHelper_t helper, | |
1314 const Td inputs_d[], const int inputs_imm1[], | |
1315 unsigned inputs_imm1_length, const Tn inputs_n[], | |
1316 unsigned inputs_n_length, const int inputs_imm2[], | |
1317 unsigned inputs_imm2_length, const Td expected[], | |
1318 unsigned expected_length, VectorFormat vd_form, | |
1319 VectorFormat vn_form) { | |
1320 DCHECK_GT(inputs_n_length, 0U); | |
1321 DCHECK_GT(inputs_imm1_length, 0U); | |
1322 DCHECK_GT(inputs_imm2_length, 0U); | |
1323 | |
1324 const unsigned vd_lane_count = LaneCountFromFormat(vd_form); | |
1325 | |
1326 const unsigned results_length = | |
1327 inputs_n_length * inputs_imm1_length * inputs_imm2_length; | |
1328 | |
1329 Td* results = new Td[results_length * vd_lane_count]; | |
1330 const unsigned lane_bit = sizeof(Td) * 8; | |
1331 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>(); | |
1332 | |
1333 TestOpImmOpImmNEON_Helper( | |
1334 helper, reinterpret_cast<uintptr_t>(inputs_d), inputs_imm1, | |
1335 inputs_imm1_length, reinterpret_cast<uintptr_t>(inputs_n), | |
1336 inputs_n_length, inputs_imm2, inputs_imm2_length, | |
1337 reinterpret_cast<uintptr_t>(results), vd_form, vn_form); | |
1338 | |
1339 if (CcTest::sim_test_trace()) { | |
1340 // Print the results. | |
1341 printf("const uint%u_t kExpected_NEON_%s[] = {\n", lane_bit, name); | |
1342 for (unsigned iteration = 0; iteration < results_length; iteration++) { | |
1343 printf(" "); | |
1344 // Output a separate result for each element of the result vector. | |
1345 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
1346 unsigned index = lane + (iteration * vd_lane_count); | |
1347 printf(" 0x%0*" PRIx64 ",", lane_len_in_hex, | |
1348 static_cast<uint64_t>(results[index])); | |
1349 } | |
1350 printf("\n"); | |
1351 } | |
1352 | |
1353 printf("};\n"); | |
1354 printf("const unsigned kExpectedCount_NEON_%s = %u;\n", name, | |
1355 results_length); | |
1356 } else { | |
1357 // Check the results. | |
1358 CHECK(expected_length == results_length); | |
1359 unsigned error_count = 0; | |
1360 unsigned counted_length = 0; | |
1361 const char* padding = " "; | |
1362 DCHECK(strlen(padding) >= (lane_len_in_hex + 1)); | |
1363 for (unsigned n = 0; n < inputs_n_length; n++) { | |
1364 for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) { | |
1365 for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) { | |
1366 bool error_in_vector = false; | |
1367 | |
1368 counted_length++; | |
1369 | |
1370 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
1371 unsigned output_index = | |
1372 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) + | |
1373 (imm1 * inputs_imm2_length * vd_lane_count) + | |
1374 (imm2 * vd_lane_count) + lane; | |
1375 | |
1376 if (results[output_index] != expected[output_index]) { | |
1377 error_in_vector = true; | |
1378 break; | |
1379 } | |
1380 } | |
1381 | |
1382 if (error_in_vector && (++error_count <= kErrorReportLimit)) { | |
1383 printf("%s\n", name); | |
1384 printf(" Vd%.*s| Imm%.*s| Vn%.*s| Imm%.*s| Vd%.*s| Expected\n", | |
1385 lane_len_in_hex + 1, padding, lane_len_in_hex, padding, | |
1386 lane_len_in_hex + 1, padding, lane_len_in_hex, padding, | |
1387 lane_len_in_hex + 1, padding); | |
1388 | |
1389 for (unsigned lane = 0; lane < vd_lane_count; lane++) { | |
1390 unsigned output_index = | |
1391 (n * inputs_imm1_length * inputs_imm2_length * | |
1392 vd_lane_count) + | |
1393 (imm1 * inputs_imm2_length * vd_lane_count) + | |
1394 (imm2 * vd_lane_count) + lane; | |
1395 unsigned input_index_n = | |
1396 (inputs_n_length - vd_lane_count + n + 1 + lane) % | |
1397 inputs_n_length; | |
1398 unsigned input_index_imm1 = imm1; | |
1399 unsigned input_index_imm2 = imm2; | |
1400 | |
1401 printf( | |
1402 "%c0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 | |
1403 " " | |
1404 "| 0x%0*" PRIx64 " | 0x%0*" PRIx64 " | 0x%0*" PRIx64 "\n", | |
1405 results[output_index] != expected[output_index] ? '*' : ' ', | |
1406 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]), | |
1407 lane_len_in_hex, | |
1408 static_cast<uint64_t>(inputs_imm1[input_index_imm1]), | |
1409 lane_len_in_hex, | |
1410 static_cast<uint64_t>(inputs_n[input_index_n]), | |
1411 lane_len_in_hex, | |
1412 static_cast<uint64_t>(inputs_imm2[input_index_imm2]), | |
1413 lane_len_in_hex, static_cast<uint64_t>(results[output_index]), | |
1414 lane_len_in_hex, | |
1415 static_cast<uint64_t>(expected[output_index])); | |
1416 } | |
1417 } | |
1418 } | |
1419 } | |
1420 } | |
1421 DCHECK_EQ(counted_length, expected_length); | |
1422 if (error_count > kErrorReportLimit) { | |
1423 printf("%u other errors follow.\n", error_count - kErrorReportLimit); | |
1424 } | |
1425 CHECK(error_count == 0); | |
1426 } | |
1427 delete[] results; | |
1428 } | |
1429 | |
1430 } // anonymous namespace | |
1431 | |
1432 // ==== NEON Tests. ==== | |
1433 | |
1434 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \ | |
1435 Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ | |
1436 &MacroAssembler::mnemonic, input_n, \ | |
1437 (sizeof(input_n) / sizeof(input_n[0])), \ | |
1438 kExpected_NEON_##mnemonic##_##vdform, \ | |
1439 kExpectedCount_NEON_##mnemonic##_##vdform, kFormat##vdform, \ | |
1440 kFormat##vnform) | |
1441 | |
1442 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n) \ | |
1443 Test1OpAcrossNEON( \ | |
1444 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(vnform), \ | |
1445 &MacroAssembler::mnemonic, input_n, \ | |
1446 (sizeof(input_n) / sizeof(input_n[0])), \ | |
1447 kExpected_NEON_##mnemonic##_##vdform##_##vnform, \ | |
1448 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform, kFormat##vdform, \ | |
1449 kFormat##vnform) | |
1450 | |
1451 #define CALL_TEST_NEON_HELPER_2Op(mnemonic, vdform, vnform, vmform, input_d, \ | |
1452 input_n, input_m) \ | |
1453 Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \ | |
1454 &MacroAssembler::mnemonic, input_d, input_n, \ | |
1455 (sizeof(input_n) / sizeof(input_n[0])), input_m, \ | |
1456 (sizeof(input_m) / sizeof(input_m[0])), \ | |
1457 kExpected_NEON_##mnemonic##_##vdform, \ | |
1458 kExpectedCount_NEON_##mnemonic##_##vdform, kFormat##vdform, \ | |
1459 kFormat##vnform, kFormat##vmform) | |
1460 | |
1461 #define CALL_TEST_NEON_HELPER_2OpImm(mnemonic, vdform, vnform, input_n, \ | |
1462 input_m) \ | |
1463 Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \ | |
1464 &MacroAssembler::mnemonic, input_n, \ | |
1465 (sizeof(input_n) / sizeof(input_n[0])), input_m, \ | |
1466 (sizeof(input_m) / sizeof(input_m[0])), \ | |
1467 kExpected_NEON_##mnemonic##_##vdform##_2OPIMM, \ | |
1468 kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM, \ | |
1469 kFormat##vdform, kFormat##vnform) | |
1470 | |
1471 #define CALL_TEST_NEON_HELPER_ByElement(mnemonic, vdform, vnform, vmform, \ | |
1472 input_d, input_n, input_m, indices) \ | |
1473 TestByElementNEON( \ | |
1474 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \ | |
1475 vnform) "_" STRINGIFY(vmform), \ | |
1476 &MacroAssembler::mnemonic, input_d, input_n, \ | |
1477 (sizeof(input_n) / sizeof(input_n[0])), input_m, \ | |
1478 (sizeof(input_m) / sizeof(input_m[0])), indices, \ | |
1479 (sizeof(indices) / sizeof(indices[0])), \ | |
1480 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ | |
1481 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \ | |
1482 kFormat##vdform, kFormat##vnform, kFormat##vmform) | |
1483 | |
1484 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper, mnemonic, vdform, vnform, \ | |
1485 input_d, input_imm1, input_n, \ | |
1486 input_imm2) \ | |
1487 TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), helper, \ | |
1488 input_d, input_imm1, \ | |
1489 (sizeof(input_imm1) / sizeof(input_imm1[0])), input_n, \ | |
1490 (sizeof(input_n) / sizeof(input_n[0])), input_imm2, \ | |
1491 (sizeof(input_imm2) / sizeof(input_imm2[0])), \ | |
1492 kExpected_NEON_##mnemonic##_##vdform, \ | |
1493 kExpectedCount_NEON_##mnemonic##_##vdform, \ | |
1494 kFormat##vdform, kFormat##vnform) | |
1495 | |
1496 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \ | |
1497 CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input) | |
1498 | |
1499 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \ | |
1500 SIMTEST(mnemonic##_8B) { \ | |
1501 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input); \ | |
1502 } \ | |
1503 SIMTEST(mnemonic##_16B) { \ | |
1504 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \ | |
1505 } | |
1506 | |
1507 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) \ | |
1508 SIMTEST(mnemonic##_4H) { \ | |
1509 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \ | |
1510 } \ | |
1511 SIMTEST(mnemonic##_8H) { \ | |
1512 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \ | |
1513 } | |
1514 | |
1515 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \ | |
1516 SIMTEST(mnemonic##_2S) { \ | |
1517 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \ | |
1518 } \ | |
1519 SIMTEST(mnemonic##_4S) { \ | |
1520 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \ | |
1521 } | |
1522 | |
1523 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \ | |
1524 DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \ | |
1525 DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) | |
1526 | |
1527 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \ | |
1528 DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \ | |
1529 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) | |
1530 | |
1531 #define DEFINE_TEST_NEON_2SAME(mnemonic, input) \ | |
1532 DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \ | |
1533 SIMTEST(mnemonic##_2D) { \ | |
1534 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \ | |
1535 } | |
1536 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input) \ | |
1537 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \ | |
1538 SIMTEST(mnemonic##_2D) { \ | |
1539 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \ | |
1540 } | |
1541 | |
1542 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \ | |
1543 SIMTEST(mnemonic##_2S) { \ | |
1544 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input); \ | |
1545 } \ | |
1546 SIMTEST(mnemonic##_4S) { \ | |
1547 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input); \ | |
1548 } \ | |
1549 SIMTEST(mnemonic##_2D) { \ | |
1550 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \ | |
1551 } | |
1552 | |
1553 #define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input) \ | |
1554 SIMTEST(mnemonic##_S) { \ | |
1555 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input); \ | |
1556 } \ | |
1557 SIMTEST(mnemonic##_D) { \ | |
1558 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \ | |
1559 } | |
1560 | |
1561 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \ | |
1562 SIMTEST(mnemonic##_B) { \ | |
1563 CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \ | |
1564 } | |
1565 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \ | |
1566 SIMTEST(mnemonic##_H) { \ | |
1567 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \ | |
1568 } | |
1569 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ | |
1570 SIMTEST(mnemonic##_S) { \ | |
1571 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \ | |
1572 } | |
1573 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) \ | |
1574 SIMTEST(mnemonic##_D) { \ | |
1575 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \ | |
1576 } | |
1577 | |
1578 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \ | |
1579 DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \ | |
1580 DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \ | |
1581 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ | |
1582 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) | |
1583 | |
1584 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \ | |
1585 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \ | |
1586 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) | |
1587 | |
1588 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \ | |
1589 CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n) | |
1590 | |
1591 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input) \ | |
1592 SIMTEST(mnemonic##_B_8B) { \ | |
1593 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input); \ | |
1594 } \ | |
1595 SIMTEST(mnemonic##_B_16B) { \ | |
1596 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \ | |
1597 } \ | |
1598 SIMTEST(mnemonic##_H_4H) { \ | |
1599 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \ | |
1600 } \ | |
1601 SIMTEST(mnemonic##_H_8H) { \ | |
1602 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \ | |
1603 } \ | |
1604 SIMTEST(mnemonic##_S_4S) { \ | |
1605 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \ | |
1606 } | |
1607 | |
1608 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input) \ | |
1609 SIMTEST(mnemonic##_H_8B) { \ | |
1610 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input); \ | |
1611 } \ | |
1612 SIMTEST(mnemonic##_H_16B) { \ | |
1613 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \ | |
1614 } \ | |
1615 SIMTEST(mnemonic##_S_4H) { \ | |
1616 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \ | |
1617 } \ | |
1618 SIMTEST(mnemonic##_S_8H) { \ | |
1619 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \ | |
1620 } \ | |
1621 SIMTEST(mnemonic##_D_4S) { \ | |
1622 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \ | |
1623 } | |
1624 | |
1625 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input) \ | |
1626 SIMTEST(mnemonic##_S_4S) { \ | |
1627 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \ | |
1628 } | |
1629 | |
1630 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \ | |
1631 CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) | |
1632 | |
1633 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input) \ | |
1634 SIMTEST(mnemonic##_4H) { \ | |
1635 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input); \ | |
1636 } \ | |
1637 SIMTEST(mnemonic##_8H) { \ | |
1638 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \ | |
1639 } \ | |
1640 SIMTEST(mnemonic##_2S) { \ | |
1641 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \ | |
1642 } \ | |
1643 SIMTEST(mnemonic##_4S) { \ | |
1644 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \ | |
1645 } \ | |
1646 SIMTEST(mnemonic##_1D) { \ | |
1647 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \ | |
1648 } \ | |
1649 SIMTEST(mnemonic##_2D) { \ | |
1650 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \ | |
1651 } | |
1652 | |
1653 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input) \ | |
1654 SIMTEST(mnemonic##_8B) { \ | |
1655 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input); \ | |
1656 } \ | |
1657 SIMTEST(mnemonic##_4H) { \ | |
1658 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input); \ | |
1659 } \ | |
1660 SIMTEST(mnemonic##_2S) { \ | |
1661 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input); \ | |
1662 } \ | |
1663 SIMTEST(mnemonic##2_16B) { \ | |
1664 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \ | |
1665 } \ | |
1666 SIMTEST(mnemonic##2_8H) { \ | |
1667 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \ | |
1668 } \ | |
1669 SIMTEST(mnemonic##2_4S) { \ | |
1670 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \ | |
1671 } | |
1672 | |
1673 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input) \ | |
1674 SIMTEST(mnemonic##_4S) { \ | |
1675 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input); \ | |
1676 } \ | |
1677 SIMTEST(mnemonic##_2D) { \ | |
1678 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input); \ | |
1679 } \ | |
1680 SIMTEST(mnemonic##2_4S) { \ | |
1681 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \ | |
1682 } \ | |
1683 SIMTEST(mnemonic##2_2D) { \ | |
1684 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input); \ | |
1685 } | |
1686 | |
1687 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input) \ | |
1688 SIMTEST(mnemonic##_4H) { \ | |
1689 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input); \ | |
1690 } \ | |
1691 SIMTEST(mnemonic##_2S) { \ | |
1692 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \ | |
1693 } \ | |
1694 SIMTEST(mnemonic##2_8H) { \ | |
1695 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input); \ | |
1696 } \ | |
1697 SIMTEST(mnemonic##2_4S) { \ | |
1698 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \ | |
1699 } | |
1700 | |
1701 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input) \ | |
1702 SIMTEST(mnemonic##_2S) { \ | |
1703 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \ | |
1704 } \ | |
1705 SIMTEST(mnemonic##2_4S) { \ | |
1706 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \ | |
1707 } | |
1708 | |
1709 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input) \ | |
1710 SIMTEST(mnemonic##_B) { \ | |
1711 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \ | |
1712 } \ | |
1713 SIMTEST(mnemonic##_H) { \ | |
1714 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \ | |
1715 } \ | |
1716 SIMTEST(mnemonic##_S) { \ | |
1717 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \ | |
1718 } | |
1719 | |
1720 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input) \ | |
1721 SIMTEST(mnemonic##_S) { \ | |
1722 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input); \ | |
1723 } \ | |
1724 SIMTEST(mnemonic##_D) { \ | |
1725 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \ | |
1726 } | |
1727 | |
1728 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \ | |
1729 { \ | |
1730 CALL_TEST_NEON_HELPER_2Op(mnemonic, variant, variant, variant, input_d, \ | |
1731 input_nm, input_nm); \ | |
1732 } | |
1733 | |
1734 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \ | |
1735 SIMTEST(mnemonic##_8B) { \ | |
1736 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8B, kInput8bitsAccDestination, \ | |
1737 kInput8bits##input); \ | |
1738 } \ | |
1739 SIMTEST(mnemonic##_16B) { \ | |
1740 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 16B, kInput8bitsAccDestination, \ | |
1741 kInput8bits##input); \ | |
1742 } | |
1743 | |
1744 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) \ | |
1745 SIMTEST(mnemonic##_4H) { \ | |
1746 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4H, kInput16bitsAccDestination, \ | |
1747 kInput16bits##input); \ | |
1748 } \ | |
1749 SIMTEST(mnemonic##_8H) { \ | |
1750 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8H, kInput16bitsAccDestination, \ | |
1751 kInput16bits##input); \ | |
1752 } \ | |
1753 SIMTEST(mnemonic##_2S) { \ | |
1754 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, kInput32bitsAccDestination, \ | |
1755 kInput32bits##input); \ | |
1756 } \ | |
1757 SIMTEST(mnemonic##_4S) { \ | |
1758 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, kInput32bitsAccDestination, \ | |
1759 kInput32bits##input); \ | |
1760 } | |
1761 | |
1762 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \ | |
1763 DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \ | |
1764 DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) | |
1765 | |
1766 #define DEFINE_TEST_NEON_3SAME(mnemonic, input) \ | |
1767 DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \ | |
1768 SIMTEST(mnemonic##_2D) { \ | |
1769 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, kInput64bitsAccDestination, \ | |
1770 kInput64bits##input); \ | |
1771 } | |
1772 | |
1773 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input) \ | |
1774 SIMTEST(mnemonic##_2S) { \ | |
1775 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, kInputFloatAccDestination, \ | |
1776 kInputFloat##input); \ | |
1777 } \ | |
1778 SIMTEST(mnemonic##_4S) { \ | |
1779 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, kInputFloatAccDestination, \ | |
1780 kInputFloat##input); \ | |
1781 } \ | |
1782 SIMTEST(mnemonic##_2D) { \ | |
1783 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, kInputDoubleAccDestination, \ | |
1784 kInputDouble##input); \ | |
1785 } | |
1786 | |
1787 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input) \ | |
1788 SIMTEST(mnemonic##_D) { \ | |
1789 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInput64bitsAccDestination, \ | |
1790 kInput64bits##input); \ | |
1791 } | |
1792 | |
1793 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input) \ | |
1794 SIMTEST(mnemonic##_H) { \ | |
1795 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, kInput16bitsAccDestination, \ | |
1796 kInput16bits##input); \ | |
1797 } \ | |
1798 SIMTEST(mnemonic##_S) { \ | |
1799 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInput32bitsAccDestination, \ | |
1800 kInput32bits##input); \ | |
1801 } | |
1802 | |
1803 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input) \ | |
1804 SIMTEST(mnemonic##_B) { \ | |
1805 CALL_TEST_NEON_HELPER_3SAME(mnemonic, B, kInput8bitsAccDestination, \ | |
1806 kInput8bits##input); \ | |
1807 } \ | |
1808 SIMTEST(mnemonic##_H) { \ | |
1809 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, kInput16bitsAccDestination, \ | |
1810 kInput16bits##input); \ | |
1811 } \ | |
1812 SIMTEST(mnemonic##_S) { \ | |
1813 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInput32bitsAccDestination, \ | |
1814 kInput32bits##input); \ | |
1815 } \ | |
1816 SIMTEST(mnemonic##_D) { \ | |
1817 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInput64bitsAccDestination, \ | |
1818 kInput64bits##input); \ | |
1819 } | |
1820 | |
1821 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input) \ | |
1822 SIMTEST(mnemonic##_S) { \ | |
1823 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInputFloatAccDestination, \ | |
1824 kInputFloat##input); \ | |
1825 } \ | |
1826 SIMTEST(mnemonic##_D) { \ | |
1827 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInputDoubleAccDestination, \ | |
1828 kInputDouble##input); \ | |
1829 } | |
1830 | |
1831 #define CALL_TEST_NEON_HELPER_3DIFF(mnemonic, vdform, vnform, vmform, input_d, \ | |
1832 input_n, input_m) \ | |
1833 { \ | |
1834 CALL_TEST_NEON_HELPER_2Op(mnemonic, vdform, vnform, vmform, input_d, \ | |
1835 input_n, input_m); \ | |
1836 } | |
1837 | |
1838 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \ | |
1839 SIMTEST(mnemonic##_8H) { \ | |
1840 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8B, 8B, \ | |
1841 kInput16bitsAccDestination, \ | |
1842 kInput8bits##input, kInput8bits##input); \ | |
1843 } \ | |
1844 SIMTEST(mnemonic##2_8H) { \ | |
1845 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 16B, 16B, \ | |
1846 kInput16bitsAccDestination, \ | |
1847 kInput8bits##input, kInput8bits##input); \ | |
1848 } | |
1849 | |
1850 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ | |
1851 SIMTEST(mnemonic##_4S) { \ | |
1852 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4H, 4H, \ | |
1853 kInput32bitsAccDestination, \ | |
1854 kInput16bits##input, kInput16bits##input); \ | |
1855 } \ | |
1856 SIMTEST(mnemonic##2_4S) { \ | |
1857 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 8H, 8H, \ | |
1858 kInput32bitsAccDestination, \ | |
1859 kInput16bits##input, kInput16bits##input); \ | |
1860 } | |
1861 | |
1862 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) \ | |
1863 SIMTEST(mnemonic##_2D) { \ | |
1864 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2S, 2S, \ | |
1865 kInput64bitsAccDestination, \ | |
1866 kInput32bits##input, kInput32bits##input); \ | |
1867 } \ | |
1868 SIMTEST(mnemonic##2_2D) { \ | |
1869 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 4S, 4S, \ | |
1870 kInput64bitsAccDestination, \ | |
1871 kInput32bits##input, kInput32bits##input); \ | |
1872 } | |
1873 | |
1874 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \ | |
1875 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ | |
1876 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) | |
1877 | |
1878 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \ | |
1879 DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \ | |
1880 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \ | |
1881 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) | |
1882 | |
1883 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \ | |
1884 SIMTEST(mnemonic##_S) { \ | |
1885 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, S, H, H, kInput32bitsAccDestination, \ | |
1886 kInput16bits##input, kInput16bits##input); \ | |
1887 } | |
1888 | |
1889 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \ | |
1890 SIMTEST(mnemonic##_D) { \ | |
1891 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, D, S, S, kInput64bitsAccDestination, \ | |
1892 kInput32bits##input, kInput32bits##input); \ | |
1893 } | |
1894 | |
1895 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \ | |
1896 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \ | |
1897 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) | |
1898 | |
1899 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input) \ | |
1900 SIMTEST(mnemonic##_8H) { \ | |
1901 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8H, 8B, \ | |
1902 kInput16bitsAccDestination, \ | |
1903 kInput16bits##input, kInput8bits##input); \ | |
1904 } \ | |
1905 SIMTEST(mnemonic##_4S) { \ | |
1906 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4S, 4H, \ | |
1907 kInput32bitsAccDestination, \ | |
1908 kInput32bits##input, kInput16bits##input); \ | |
1909 } \ | |
1910 SIMTEST(mnemonic##_2D) { \ | |
1911 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2D, 2S, \ | |
1912 kInput64bitsAccDestination, \ | |
1913 kInput64bits##input, kInput32bits##input); \ | |
1914 } \ | |
1915 SIMTEST(mnemonic##2_8H) { \ | |
1916 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 8H, 16B, \ | |
1917 kInput16bitsAccDestination, \ | |
1918 kInput16bits##input, kInput8bits##input); \ | |
1919 } \ | |
1920 SIMTEST(mnemonic##2_4S) { \ | |
1921 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 4S, 8H, \ | |
1922 kInput32bitsAccDestination, \ | |
1923 kInput32bits##input, kInput16bits##input); \ | |
1924 } \ | |
1925 SIMTEST(mnemonic##2_2D) { \ | |
1926 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 2D, 4S, \ | |
1927 kInput64bitsAccDestination, \ | |
1928 kInput64bits##input, kInput32bits##input); \ | |
1929 } | |
1930 | |
1931 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input) \ | |
1932 SIMTEST(mnemonic##_8B) { \ | |
1933 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8B, 8H, 8H, \ | |
1934 kInput8bitsAccDestination, \ | |
1935 kInput16bits##input, kInput16bits##input); \ | |
1936 } \ | |
1937 SIMTEST(mnemonic##_4H) { \ | |
1938 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4H, 4S, 4S, \ | |
1939 kInput16bitsAccDestination, \ | |
1940 kInput32bits##input, kInput32bits##input); \ | |
1941 } \ | |
1942 SIMTEST(mnemonic##_2S) { \ | |
1943 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2S, 2D, 2D, \ | |
1944 kInput32bitsAccDestination, \ | |
1945 kInput64bits##input, kInput64bits##input); \ | |
1946 } \ | |
1947 SIMTEST(mnemonic##2_16B) { \ | |
1948 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 16B, 8H, 8H, \ | |
1949 kInput8bitsAccDestination, \ | |
1950 kInput16bits##input, kInput16bits##input); \ | |
1951 } \ | |
1952 SIMTEST(mnemonic##2_8H) { \ | |
1953 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 4S, 4S, \ | |
1954 kInput16bitsAccDestination, \ | |
1955 kInput32bits##input, kInput32bits##input); \ | |
1956 } \ | |
1957 SIMTEST(mnemonic##2_4S) { \ | |
1958 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 2D, 2D, \ | |
1959 kInput32bitsAccDestination, \ | |
1960 kInput64bits##input, kInput64bits##input); \ | |
1961 } | |
1962 | |
1963 #define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, vdform, vnform, input_n, \ | |
1964 input_imm) \ | |
1965 { \ | |
1966 CALL_TEST_NEON_HELPER_2OpImm(mnemonic, vdform, vnform, input_n, \ | |
1967 input_imm); \ | |
1968 } | |
1969 | |
1970 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm) \ | |
1971 SIMTEST(mnemonic##_8B_2OPIMM) { \ | |
1972 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, 8B, kInput8bits##input, \ | |
1973 kInput8bitsImm##input_imm); \ | |
1974 } \ | |
1975 SIMTEST(mnemonic##_16B_2OPIMM) { \ | |
1976 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 16B, 16B, kInput8bits##input, \ | |
1977 kInput8bitsImm##input_imm); \ | |
1978 } \ | |
1979 SIMTEST(mnemonic##_4H_2OPIMM) { \ | |
1980 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, 4H, kInput16bits##input, \ | |
1981 kInput16bitsImm##input_imm); \ | |
1982 } \ | |
1983 SIMTEST(mnemonic##_8H_2OPIMM) { \ | |
1984 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, 8H, kInput16bits##input, \ | |
1985 kInput16bitsImm##input_imm); \ | |
1986 } \ | |
1987 SIMTEST(mnemonic##_2S_2OPIMM) { \ | |
1988 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInput32bits##input, \ | |
1989 kInput32bitsImm##input_imm); \ | |
1990 } \ | |
1991 SIMTEST(mnemonic##_4S_2OPIMM) { \ | |
1992 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInput32bits##input, \ | |
1993 kInput32bitsImm##input_imm); \ | |
1994 } \ | |
1995 SIMTEST(mnemonic##_2D_2OPIMM) { \ | |
1996 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInput64bits##input, \ | |
1997 kInput64bitsImm##input_imm); \ | |
1998 } | |
1999 | |
2000 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \ | |
2001 SIMTEST(mnemonic##_8B_2OPIMM) { \ | |
2002 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, B, kInput8bits##input, \ | |
2003 kInput8bitsImm##input_imm); \ | |
2004 } \ | |
2005 SIMTEST(mnemonic##_16B_2OPIMM) { \ | |
2006 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 16B, B, kInput8bits##input, \ | |
2007 kInput8bitsImm##input_imm); \ | |
2008 } \ | |
2009 SIMTEST(mnemonic##_4H_2OPIMM) { \ | |
2010 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, H, kInput16bits##input, \ | |
2011 kInput16bitsImm##input_imm); \ | |
2012 } \ | |
2013 SIMTEST(mnemonic##_8H_2OPIMM) { \ | |
2014 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, H, kInput16bits##input, \ | |
2015 kInput16bitsImm##input_imm); \ | |
2016 } \ | |
2017 SIMTEST(mnemonic##_2S_2OPIMM) { \ | |
2018 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, S, kInput32bits##input, \ | |
2019 kInput32bitsImm##input_imm); \ | |
2020 } \ | |
2021 SIMTEST(mnemonic##_4S_2OPIMM) { \ | |
2022 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, S, kInput32bits##input, \ | |
2023 kInput32bitsImm##input_imm); \ | |
2024 } \ | |
2025 SIMTEST(mnemonic##_2D_2OPIMM) { \ | |
2026 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, D, kInput64bits##input, \ | |
2027 kInput64bitsImm##input_imm); \ | |
2028 } | |
2029 | |
2030 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \ | |
2031 SIMTEST(mnemonic##_8B_2OPIMM) { \ | |
2032 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, 8H, kInput16bits##input, \ | |
2033 kInput8bitsImm##input_imm); \ | |
2034 } \ | |
2035 SIMTEST(mnemonic##_4H_2OPIMM) { \ | |
2036 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, 4S, kInput32bits##input, \ | |
2037 kInput16bitsImm##input_imm); \ | |
2038 } \ | |
2039 SIMTEST(mnemonic##_2S_2OPIMM) { \ | |
2040 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2D, kInput64bits##input, \ | |
2041 kInput32bitsImm##input_imm); \ | |
2042 } \ | |
2043 SIMTEST(mnemonic##2_16B_2OPIMM) { \ | |
2044 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 16B, 8H, kInput16bits##input, \ | |
2045 kInput8bitsImm##input_imm); \ | |
2046 } \ | |
2047 SIMTEST(mnemonic##2_8H_2OPIMM) { \ | |
2048 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 8H, 4S, kInput32bits##input, \ | |
2049 kInput16bitsImm##input_imm); \ | |
2050 } \ | |
2051 SIMTEST(mnemonic##2_4S_2OPIMM) { \ | |
2052 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 4S, 2D, kInput64bits##input, \ | |
2053 kInput32bitsImm##input_imm); \ | |
2054 } | |
2055 | |
2056 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \ | |
2057 SIMTEST(mnemonic##_B_2OPIMM) { \ | |
2058 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, B, H, kInput16bits##input, \ | |
2059 kInput8bitsImm##input_imm); \ | |
2060 } \ | |
2061 SIMTEST(mnemonic##_H_2OPIMM) { \ | |
2062 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, H, S, kInput32bits##input, \ | |
2063 kInput16bitsImm##input_imm); \ | |
2064 } \ | |
2065 SIMTEST(mnemonic##_S_2OPIMM) { \ | |
2066 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, D, kInput64bits##input, \ | |
2067 kInput32bitsImm##input_imm); \ | |
2068 } | |
2069 | |
2070 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \ | |
2071 SIMTEST(mnemonic##_2S_2OPIMM) { \ | |
2072 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInputFloat##Basic, \ | |
2073 kInputDoubleImm##input_imm) \ | |
2074 } \ | |
2075 SIMTEST(mnemonic##_4S_2OPIMM) { \ | |
2076 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInputFloat##input, \ | |
2077 kInputDoubleImm##input_imm); \ | |
2078 } \ | |
2079 SIMTEST(mnemonic##_2D_2OPIMM) { \ | |
2080 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInputDouble##input, \ | |
2081 kInputDoubleImm##input_imm); \ | |
2082 } | |
2083 | |
2084 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \ | |
2085 SIMTEST(mnemonic##_2S_2OPIMM) { \ | |
2086 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInputFloat##Basic, \ | |
2087 kInput32bitsImm##input_imm) \ | |
2088 } \ | |
2089 SIMTEST(mnemonic##_4S_2OPIMM) { \ | |
2090 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInputFloat##input, \ | |
2091 kInput32bitsImm##input_imm) \ | |
2092 } \ | |
2093 SIMTEST(mnemonic##_2D_2OPIMM) { \ | |
2094 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInputDouble##input, \ | |
2095 kInput64bitsImm##input_imm) \ | |
2096 } | |
2097 | |
2098 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \ | |
2099 SIMTEST(mnemonic##_S_2OPIMM) { \ | |
2100 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInputFloat##Basic, \ | |
2101 kInput32bitsImm##input_imm) \ | |
2102 } \ | |
2103 SIMTEST(mnemonic##_D_2OPIMM) { \ | |
2104 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInputDouble##input, \ | |
2105 kInput64bitsImm##input_imm) \ | |
2106 } | |
2107 | |
2108 #define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm) \ | |
2109 SIMTEST(mnemonic##_2S_2OPIMM) { \ | |
2110 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInput32bits##input, \ | |
2111 kInput32bitsImm##input_imm); \ | |
2112 } \ | |
2113 SIMTEST(mnemonic##_4S_2OPIMM) { \ | |
2114 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInput32bits##input, \ | |
2115 kInput32bitsImm##input_imm); \ | |
2116 } \ | |
2117 SIMTEST(mnemonic##_2D_2OPIMM) { \ | |
2118 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInput64bits##input, \ | |
2119 kInput64bitsImm##input_imm); \ | |
2120 } | |
2121 | |
2122 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \ | |
2123 SIMTEST(mnemonic##_D_2OPIMM) { \ | |
2124 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInput64bits##input, \ | |
2125 kInput64bitsImm##input_imm); \ | |
2126 } | |
2127 | |
2128 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) \ | |
2129 SIMTEST(mnemonic##_S_2OPIMM) { \ | |
2130 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInput32bits##input, \ | |
2131 kInput32bitsImm##input_imm); \ | |
2132 } \ | |
2133 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) | |
2134 | |
2135 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \ | |
2136 SIMTEST(mnemonic##_D_2OPIMM) { \ | |
2137 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInputDouble##input, \ | |
2138 kInputDoubleImm##input_imm); \ | |
2139 } | |
2140 | |
2141 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm) \ | |
2142 SIMTEST(mnemonic##_S_2OPIMM) { \ | |
2143 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInputFloat##input, \ | |
2144 kInputDoubleImm##input_imm); \ | |
2145 } \ | |
2146 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) | |
2147 | |
2148 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \ | |
2149 SIMTEST(mnemonic##_B_2OPIMM) { \ | |
2150 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, B, B, kInput8bits##input, \ | |
2151 kInput8bitsImm##input_imm); \ | |
2152 } \ | |
2153 SIMTEST(mnemonic##_H_2OPIMM) { \ | |
2154 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, H, H, kInput16bits##input, \ | |
2155 kInput16bitsImm##input_imm); \ | |
2156 } \ | |
2157 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) | |
2158 | |
2159 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \ | |
2160 SIMTEST(mnemonic##_8H_2OPIMM) { \ | |
2161 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, 8B, kInput8bits##input, \ | |
2162 kInput8bitsImm##input_imm); \ | |
2163 } \ | |
2164 SIMTEST(mnemonic##_4S_2OPIMM) { \ | |
2165 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4H, kInput16bits##input, \ | |
2166 kInput16bitsImm##input_imm); \ | |
2167 } \ | |
2168 SIMTEST(mnemonic##_2D_2OPIMM) { \ | |
2169 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2S, kInput32bits##input, \ | |
2170 kInput32bitsImm##input_imm); \ | |
2171 } \ | |
2172 SIMTEST(mnemonic##2_8H_2OPIMM) { \ | |
2173 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 8H, 16B, kInput8bits##input, \ | |
2174 kInput8bitsImm##input_imm); \ | |
2175 } \ | |
2176 SIMTEST(mnemonic##2_4S_2OPIMM) { \ | |
2177 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 4S, 8H, kInput16bits##input, \ | |
2178 kInput16bitsImm##input_imm); \ | |
2179 } \ | |
2180 SIMTEST(mnemonic##2_2D_2OPIMM) { \ | |
2181 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 2D, 4S, kInput32bits##input, \ | |
2182 kInput32bitsImm##input_imm); \ | |
2183 } | |
2184 | |
2185 #define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, vdform, vnform, vmform, \ | |
2186 input_d, input_n, input_m, indices) \ | |
2187 { \ | |
2188 CALL_TEST_NEON_HELPER_ByElement(mnemonic, vdform, vnform, vmform, input_d, \ | |
2189 input_n, input_m, indices); \ | |
2190 } | |
2191 | |
2192 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \ | |
2193 SIMTEST(mnemonic##_4H_4H_H) { \ | |
2194 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
2195 mnemonic, 4H, 4H, H, kInput16bits##input_d, kInput16bits##input_n, \ | |
2196 kInput16bits##input_m, kInputHIndices); \ | |
2197 } \ | |
2198 SIMTEST(mnemonic##_8H_8H_H) { \ | |
2199 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
2200 mnemonic, 8H, 8H, H, kInput16bits##input_d, kInput16bits##input_n, \ | |
2201 kInput16bits##input_m, kInputHIndices); \ | |
2202 } \ | |
2203 SIMTEST(mnemonic##_2S_2S_S) { \ | |
2204 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
2205 mnemonic, 2S, 2S, S, kInput32bits##input_d, kInput32bits##input_n, \ | |
2206 kInput32bits##input_m, kInputSIndices); \ | |
2207 } \ | |
2208 SIMTEST(mnemonic##_4S_4S_S) { \ | |
2209 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
2210 mnemonic, 4S, 4S, S, kInput32bits##input_d, kInput32bits##input_n, \ | |
2211 kInput32bits##input_m, kInputSIndices); \ | |
2212 } | |
2213 | |
2214 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \ | |
2215 SIMTEST(mnemonic##_H_H_H) { \ | |
2216 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, H, H, H, kInput16bits##input_d, \ | |
2217 kInput16bits##input_n, \ | |
2218 kInput16bits##input_m, kInputHIndices); \ | |
2219 } \ | |
2220 SIMTEST(mnemonic##_S_S_S) { \ | |
2221 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, S, S, kInput32bits##input_d, \ | |
2222 kInput32bits##input_n, \ | |
2223 kInput32bits##input_m, kInputSIndices); \ | |
2224 } | |
2225 | |
2226 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \ | |
2227 SIMTEST(mnemonic##_2S_2S_S) { \ | |
2228 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, 2S, 2S, S, kInputFloat##input_d, \ | |
2229 kInputFloat##input_n, \ | |
2230 kInputFloat##input_m, kInputSIndices); \ | |
2231 } \ | |
2232 SIMTEST(mnemonic##_4S_4S_S) { \ | |
2233 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, 4S, 4S, S, kInputFloat##input_d, \ | |
2234 kInputFloat##input_n, \ | |
2235 kInputFloat##input_m, kInputSIndices); \ | |
2236 } \ | |
2237 SIMTEST(mnemonic##_2D_2D_D) { \ | |
2238 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
2239 mnemonic, 2D, 2D, D, kInputDouble##input_d, kInputDouble##input_n, \ | |
2240 kInputDouble##input_m, kInputDIndices); \ | |
2241 } | |
2242 | |
2243 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \ | |
2244 SIMTEST(mnemonic##_S_S_S) { \ | |
2245 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, S, S, kInputFloat##inp_d, \ | |
2246 kInputFloat##inp_n, kInputFloat##inp_m, \ | |
2247 kInputSIndices); \ | |
2248 } \ | |
2249 SIMTEST(mnemonic##_D_D_D) { \ | |
2250 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, D, D, D, kInputDouble##inp_d, \ | |
2251 kInputDouble##inp_n, kInputDouble##inp_m, \ | |
2252 kInputDIndices); \ | |
2253 } | |
2254 | |
2255 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \ | |
2256 SIMTEST(mnemonic##_4S_4H_H) { \ | |
2257 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
2258 mnemonic, 4S, 4H, H, kInput32bits##input_d, kInput16bits##input_n, \ | |
2259 kInput16bits##input_m, kInputHIndices); \ | |
2260 } \ | |
2261 SIMTEST(mnemonic##2_4S_8H_H) { \ | |
2262 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
2263 mnemonic##2, 4S, 8H, H, kInput32bits##input_d, kInput16bits##input_n, \ | |
2264 kInput16bits##input_m, kInputHIndices); \ | |
2265 } \ | |
2266 SIMTEST(mnemonic##_2D_2S_S) { \ | |
2267 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
2268 mnemonic, 2D, 2S, S, kInput64bits##input_d, kInput32bits##input_n, \ | |
2269 kInput32bits##input_m, kInputSIndices); \ | |
2270 } \ | |
2271 SIMTEST(mnemonic##2_2D_4S_S) { \ | |
2272 CALL_TEST_NEON_HELPER_BYELEMENT( \ | |
2273 mnemonic##2, 2D, 4S, S, kInput64bits##input_d, kInput32bits##input_n, \ | |
2274 kInput32bits##input_m, kInputSIndices); \ | |
2275 } | |
2276 | |
2277 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic, input_d, input_n, \ | |
2278 input_m) \ | |
2279 SIMTEST(mnemonic##_S_H_H) { \ | |
2280 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, H, H, kInput32bits##input_d, \ | |
2281 kInput16bits##input_n, \ | |
2282 kInput16bits##input_m, kInputHIndices); \ | |
2283 } \ | |
2284 SIMTEST(mnemonic##_D_S_S) { \ | |
2285 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, D, S, S, kInput64bits##input_d, \ | |
2286 kInput32bits##input_n, \ | |
2287 kInput32bits##input_m, kInputSIndices); \ | |
2288 } | |
2289 | |
2290 #define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, variant, input_d, input_imm1, \ | |
2291 input_n, input_imm2) \ | |
2292 { \ | |
2293 CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, mnemonic, \ | |
2294 variant, variant, input_d, input_imm1, \ | |
2295 input_n, input_imm2); \ | |
2296 } | |
2297 | |
2298 #define DEFINE_TEST_NEON_2OP2IMM(mnemonic, input_d, input_imm1, input_n, \ | |
2299 input_imm2) \ | |
2300 SIMTEST(mnemonic##_B) { \ | |
2301 CALL_TEST_NEON_HELPER_2OP2IMM( \ | |
2302 mnemonic, 16B, kInput8bits##input_d, kInput8bitsImm##input_imm1, \ | |
2303 kInput8bits##input_n, kInput8bitsImm##input_imm2); \ | |
2304 } \ | |
2305 SIMTEST(mnemonic##_H) { \ | |
2306 CALL_TEST_NEON_HELPER_2OP2IMM( \ | |
2307 mnemonic, 8H, kInput16bits##input_d, kInput16bitsImm##input_imm1, \ | |
2308 kInput16bits##input_n, kInput16bitsImm##input_imm2); \ | |
2309 } \ | |
2310 SIMTEST(mnemonic##_S) { \ | |
2311 CALL_TEST_NEON_HELPER_2OP2IMM( \ | |
2312 mnemonic, 4S, kInput32bits##input_d, kInput32bitsImm##input_imm1, \ | |
2313 kInput32bits##input_n, kInput32bitsImm##input_imm2); \ | |
2314 } \ | |
2315 SIMTEST(mnemonic##_D) { \ | |
2316 CALL_TEST_NEON_HELPER_2OP2IMM( \ | |
2317 mnemonic, 2D, kInput64bits##input_d, kInput64bitsImm##input_imm1, \ | |
2318 kInput64bits##input_n, kInput64bitsImm##input_imm2); \ | |
2319 } | |
2320 | |
2321 // Advanced SIMD copy. | |
2322 DEFINE_TEST_NEON_2OP2IMM(ins, Basic, LaneCountFromZero, Basic, | |
2323 LaneCountFromZero) | |
2324 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero) | |
2325 | |
2326 // Advanced SIMD scalar copy. | |
2327 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero) | |
2328 | |
2329 // Advanced SIMD three same. | |
2330 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic) | |
2331 DEFINE_TEST_NEON_3SAME(sqadd, Basic) | |
2332 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic) | |
2333 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic) | |
2334 DEFINE_TEST_NEON_3SAME(sqsub, Basic) | |
2335 DEFINE_TEST_NEON_3SAME(cmgt, Basic) | |
2336 DEFINE_TEST_NEON_3SAME(cmge, Basic) | |
2337 DEFINE_TEST_NEON_3SAME(sshl, Basic) | |
2338 DEFINE_TEST_NEON_3SAME(sqshl, Basic) | |
2339 DEFINE_TEST_NEON_3SAME(srshl, Basic) | |
2340 DEFINE_TEST_NEON_3SAME(sqrshl, Basic) | |
2341 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic) | |
2342 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic) | |
2343 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic) | |
2344 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic) | |
2345 DEFINE_TEST_NEON_3SAME(add, Basic) | |
2346 DEFINE_TEST_NEON_3SAME(cmtst, Basic) | |
2347 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic) | |
2348 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic) | |
2349 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic) | |
2350 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic) | |
2351 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic) | |
2352 DEFINE_TEST_NEON_3SAME(addp, Basic) | |
2353 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic) | |
2354 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic) | |
2355 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic) | |
2356 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic) | |
2357 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic) | |
2358 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic) | |
2359 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic) | |
2360 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic) | |
2361 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic) | |
2362 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic) | |
2363 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic) | |
2364 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic) | |
2365 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic) | |
2366 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic) | |
2367 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic) | |
2368 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic) | |
2369 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic) | |
2370 DEFINE_TEST_NEON_3SAME(uqadd, Basic) | |
2371 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic) | |
2372 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic) | |
2373 DEFINE_TEST_NEON_3SAME(uqsub, Basic) | |
2374 DEFINE_TEST_NEON_3SAME(cmhi, Basic) | |
2375 DEFINE_TEST_NEON_3SAME(cmhs, Basic) | |
2376 DEFINE_TEST_NEON_3SAME(ushl, Basic) | |
2377 DEFINE_TEST_NEON_3SAME(uqshl, Basic) | |
2378 DEFINE_TEST_NEON_3SAME(urshl, Basic) | |
2379 DEFINE_TEST_NEON_3SAME(uqrshl, Basic) | |
2380 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic) | |
2381 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic) | |
2382 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic) | |
2383 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic) | |
2384 DEFINE_TEST_NEON_3SAME(sub, Basic) | |
2385 DEFINE_TEST_NEON_3SAME(cmeq, Basic) | |
2386 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic) | |
2387 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic) | |
2388 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic) | |
2389 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic) | |
2390 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic) | |
2391 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic) | |
2392 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic) | |
2393 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic) | |
2394 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic) | |
2395 DEFINE_TEST_NEON_3SAME_FP(facge, Basic) | |
2396 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic) | |
2397 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic) | |
2398 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic) | |
2399 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic) | |
2400 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic) | |
2401 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic) | |
2402 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic) | |
2403 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic) | |
2404 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic) | |
2405 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic) | |
2406 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic) | |
2407 | |
2408 // Advanced SIMD scalar three same. | |
2409 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic) | |
2410 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic) | |
2411 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic) | |
2412 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic) | |
2413 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic) | |
2414 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic) | |
2415 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic) | |
2416 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic) | |
2417 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic) | |
2418 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic) | |
2419 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic) | |
2420 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic) | |
2421 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic) | |
2422 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic) | |
2423 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic) | |
2424 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic) | |
2425 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic) | |
2426 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic) | |
2427 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic) | |
2428 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic) | |
2429 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic) | |
2430 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic) | |
2431 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic) | |
2432 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic) | |
2433 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic) | |
2434 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic) | |
2435 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic) | |
2436 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic) | |
2437 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic) | |
2438 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic) | |
2439 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic) | |
2440 | |
2441 // Advanced SIMD three different. | |
2442 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic) | |
2443 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic) | |
2444 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic) | |
2445 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic) | |
2446 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic) | |
2447 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic) | |
2448 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic) | |
2449 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic) | |
2450 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic) | |
2451 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic) | |
2452 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic) | |
2453 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic) | |
2454 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic) | |
2455 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic) | |
2456 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic) | |
2457 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic) | |
2458 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic) | |
2459 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic) | |
2460 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic) | |
2461 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic) | |
2462 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic) | |
2463 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic) | |
2464 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic) | |
2465 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic) | |
2466 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic) | |
2467 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic) | |
2468 | |
2469 // Advanced SIMD scalar three different. | |
2470 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic) | |
2471 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic) | |
2472 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic) | |
2473 | |
2474 // Advanced SIMD scalar pairwise. | |
2475 SIMTEST(addp_SCALAR) { | |
2476 CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic); | |
2477 } | |
2478 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic) | |
2479 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic) | |
2480 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic) | |
2481 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic) | |
2482 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic) | |
2483 | |
2484 // Advanced SIMD shift by immediate. | |
2485 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth) | |
2486 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth) | |
2487 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth) | |
2488 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth) | |
2489 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero) | |
2490 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero) | |
2491 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth) | |
2492 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth) | |
2493 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth) | |
2494 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth) | |
2495 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero) | |
2496 DEFINE_TEST_NEON_2OPIMM_SD(scvtf, FixedPointConversions, | |
2497 TypeWidthFromZeroToWidth) | |
2498 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth) | |
2499 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth) | |
2500 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth) | |
2501 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth) | |
2502 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth) | |
2503 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth) | |
2504 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero) | |
2505 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero) | |
2506 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero) | |
2507 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth) | |
2508 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth) | |
2509 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth) | |
2510 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth) | |
2511 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero) | |
2512 DEFINE_TEST_NEON_2OPIMM_SD(ucvtf, FixedPointConversions, | |
2513 TypeWidthFromZeroToWidth) | |
2514 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth) | |
2515 | |
2516 // Advanced SIMD scalar shift by immediate.. | |
2517 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth) | |
2518 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth) | |
2519 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth) | |
2520 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth) | |
2521 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero) | |
2522 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero) | |
2523 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth) | |
2524 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth) | |
2525 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf, FixedPointConversions, | |
2526 TypeWidthFromZeroToWidth) | |
2527 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth) | |
2528 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth) | |
2529 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth) | |
2530 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth) | |
2531 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth) | |
2532 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth) | |
2533 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero) | |
2534 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero) | |
2535 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero) | |
2536 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth) | |
2537 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth) | |
2538 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth) | |
2539 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth) | |
2540 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf, FixedPointConversions, | |
2541 TypeWidthFromZeroToWidth) | |
2542 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth) | |
2543 | |
2544 // Advanced SIMD two-register miscellaneous. | |
2545 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic) | |
2546 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic) | |
2547 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic) | |
2548 DEFINE_TEST_NEON_2SAME(suqadd, Basic) | |
2549 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic) | |
2550 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic) | |
2551 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic) | |
2552 DEFINE_TEST_NEON_2SAME(sqabs, Basic) | |
2553 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero) | |
2554 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero) | |
2555 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero) | |
2556 DEFINE_TEST_NEON_2SAME(abs, Basic) | |
2557 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic) | |
2558 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic) | |
2559 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions) | |
2560 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions) | |
2561 DEFINE_TEST_NEON_2SAME_FP(frintn, Conversions) | |
2562 DEFINE_TEST_NEON_2SAME_FP(frintm, Conversions) | |
2563 DEFINE_TEST_NEON_2SAME_FP(fcvtns, Conversions) | |
2564 DEFINE_TEST_NEON_2SAME_FP(fcvtms, Conversions) | |
2565 DEFINE_TEST_NEON_2SAME_FP(fcvtas, Conversions) | |
2566 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0. | |
2567 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero) | |
2568 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero) | |
2569 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero) | |
2570 DEFINE_TEST_NEON_2SAME_FP(fabs, Basic) | |
2571 DEFINE_TEST_NEON_2SAME_FP(frintp, Conversions) | |
2572 DEFINE_TEST_NEON_2SAME_FP(frintz, Conversions) | |
2573 DEFINE_TEST_NEON_2SAME_FP(fcvtps, Conversions) | |
2574 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0. | |
2575 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic) | |
2576 DEFINE_TEST_NEON_2SAME_FP(frecpe, Basic) | |
2577 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic) | |
2578 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic) | |
2579 DEFINE_TEST_NEON_2SAME(usqadd, Basic) | |
2580 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic) | |
2581 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic) | |
2582 DEFINE_TEST_NEON_2SAME(sqneg, Basic) | |
2583 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero) | |
2584 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero) | |
2585 DEFINE_TEST_NEON_2SAME(neg, Basic) | |
2586 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic) | |
2587 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL) | |
2588 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic) | |
2589 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions) | |
2590 DEFINE_TEST_NEON_2SAME_FP(frinta, Conversions) | |
2591 DEFINE_TEST_NEON_2SAME_FP(frintx, Conversions) | |
2592 DEFINE_TEST_NEON_2SAME_FP(fcvtnu, Conversions) | |
2593 DEFINE_TEST_NEON_2SAME_FP(fcvtmu, Conversions) | |
2594 DEFINE_TEST_NEON_2SAME_FP(fcvtau, Conversions) | |
2595 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0. | |
2596 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic) | |
2597 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic) | |
2598 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero) | |
2599 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero) | |
2600 DEFINE_TEST_NEON_2SAME_FP(fneg, Basic) | |
2601 DEFINE_TEST_NEON_2SAME_FP(frinti, Conversions) | |
2602 DEFINE_TEST_NEON_2SAME_FP(fcvtpu, Conversions) | |
2603 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0. | |
2604 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic) | |
2605 DEFINE_TEST_NEON_2SAME_FP(frsqrte, Basic) | |
2606 DEFINE_TEST_NEON_2SAME_FP(fsqrt, Basic) | |
2607 | |
2608 // Advanced SIMD scalar two-register miscellaneous. | |
2609 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic) | |
2610 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic) | |
2611 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero) | |
2612 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero) | |
2613 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero) | |
2614 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic) | |
2615 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic) | |
2616 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtns, Conversions) | |
2617 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtms, Conversions) | |
2618 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtas, Conversions) | |
2619 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0. | |
2620 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmgt, Basic, Zero) | |
2621 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmeq, Basic, Zero) | |
2622 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmlt, Basic, Zero) | |
2623 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtps, Conversions) | |
2624 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0. | |
2625 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpe, Basic) | |
2626 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpx, Basic) | |
2627 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic) | |
2628 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic) | |
2629 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero) | |
2630 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero) | |
2631 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic) | |
2632 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic) | |
2633 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic) | |
2634 SIMTEST(fcvtxn_SCALAR) { | |
2635 CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions); | |
2636 } | |
2637 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtnu, Conversions) | |
2638 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtmu, Conversions) | |
2639 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtau, Conversions) | |
2640 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0. | |
2641 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmge, Basic, Zero) | |
2642 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmle, Basic, Zero) | |
2643 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtpu, Conversions) | |
2644 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0. | |
2645 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frsqrte, Basic) | |
2646 | |
2647 // Advanced SIMD across lanes. | |
2648 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic) | |
2649 DEFINE_TEST_NEON_ACROSS(smaxv, Basic) | |
2650 DEFINE_TEST_NEON_ACROSS(sminv, Basic) | |
2651 DEFINE_TEST_NEON_ACROSS(addv, Basic) | |
2652 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic) | |
2653 DEFINE_TEST_NEON_ACROSS(umaxv, Basic) | |
2654 DEFINE_TEST_NEON_ACROSS(uminv, Basic) | |
2655 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic) | |
2656 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic) | |
2657 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic) | |
2658 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic) | |
2659 | |
2660 // Advanced SIMD permute. | |
2661 DEFINE_TEST_NEON_3SAME(uzp1, Basic) | |
2662 DEFINE_TEST_NEON_3SAME(trn1, Basic) | |
2663 DEFINE_TEST_NEON_3SAME(zip1, Basic) | |
2664 DEFINE_TEST_NEON_3SAME(uzp2, Basic) | |
2665 DEFINE_TEST_NEON_3SAME(trn2, Basic) | |
2666 DEFINE_TEST_NEON_3SAME(zip2, Basic) | |
2667 | |
2668 // Advanced SIMD vector x indexed element. | |
2669 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic) | |
2670 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic) | |
2671 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic) | |
2672 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic) | |
2673 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic) | |
2674 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic) | |
2675 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic) | |
2676 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic) | |
2677 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic) | |
2678 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic) | |
2679 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic) | |
2680 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic) | |
2681 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic) | |
2682 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic) | |
2683 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic) | |
2684 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic) | |
2685 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic) | |
2686 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic) | |
2687 | |
2688 // Advanced SIMD scalar x indexed element. | |
2689 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic) | |
2690 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic) | |
2691 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic) | |
2692 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic) | |
2693 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic) | |
2694 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic) | |
2695 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic) | |
2696 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic) | |
2697 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic) | |
OLD | NEW |