test/cctest/test-simulator-neon-arm64.cc - Issue 2785183005: Revert "ARM64: Add NEON support"

Side by Side Diff: test/cctest/test-simulator-neon-arm64.cc

Issue 2785183005: Revert "ARM64: Add NEON support" (Closed)

Patch Set: Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 // Copyright 2016 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include <stdio.h>

6 #include <stdlib.h>

7 #include <string.h>

8 #include <cmath>

9 #include <limits>

10

11 #include "src/arm64/decoder-arm64-inl.h"

12 #include "src/arm64/disasm-arm64.h"

13 #include "src/arm64/simulator-arm64.h"

14 #include "src/arm64/utils-arm64.h"

15 #include "src/base/platform/platform.h"

16 #include "src/base/utils/random-number-generator.h"

17 #include "src/macro-assembler-inl.h"

18 #include "test/cctest/cctest.h"

19 #include "test/cctest/test-simulator-neon-inputs-arm64.h"

20 #include "test/cctest/test-simulator-neon-traces-arm64.h"

21 #include "test/cctest/test-utils-arm64.h"

22

23 using namespace v8::internal;

24

25 // Test infrastructure.

26 //

27 // Tests are functions which accept no parameters and have no return values.

28 // The testing code should not perform an explicit return once completed. For

29 // example to test the mov immediate instruction a very simple test would be:

30 //

31 // SIMTEST(mov_x0_one) {

32 // SETUP();

33 //

34 // START();

35 // __ mov(x0, Operand(1));

36 // END();

37 //

38 // RUN();

39 //

40 // CHECK_EQUAL_64(1, x0);

41 //

42 // TEARDOWN();

43 // }

44 //

45 // Within a START ... END block all registers but sp can be modified. sp has to

46 // be explicitly saved/restored. The END() macro replaces the function return

47 // so it may appear multiple times in a test if the test has multiple exit

48 // points.

49 //

50 // Once the test has been run all integer and floating point registers as well

51 // as flags are accessible through a RegisterDump instance, see

52 // test-utils-arm64.h for more info on RegisterDump.

53 //

54 // We provide some helper assert to handle common cases:

55 //

56 // CHECK_EQUAL_32(int32_t, int32_t)

57 // CHECK_EQUAL_FP32(float, float)

58 // CHECK_EQUAL_32(int32_t, W register)

59 // CHECK_EQUAL_FP32(float, S register)

60 // CHECK_EQUAL_64(int64_t, int64_t)

61 // CHECK_EQUAL_FP64(double, double)

62 // CHECK_EQUAL_64(int64_t, X register)

63 // CHECK_EQUAL_64(X register, X register)

64 // CHECK_EQUAL_FP64(double, D register)

65 //

66 // e.g. CHECK_EQUAL_64(0.5, d30);

67 //

68 // If more advance computation is required before the assert then access the

69 // RegisterDump named core directly:

70 //

71 // CHECK_EQUAL_64(0x1234, core.xreg(0) & 0xffff);

72

73 #if 0 // TODO(all): enable.

74 static v8::Persistent<v8::Context> env;

75

76 static void InitializeVM() {

77 if (env.IsEmpty()) {

78 env = v8::Context::New();

79 }

80 }

81 #endif

82

83 #define __ masm.

84 #define SIMTEST(name) TEST(SIM_##name)

85

86 #define BUF_SIZE 8192

87 #define SETUP() SETUP_SIZE(BUF_SIZE)

88

89 #define INIT_V8() CcTest::InitializeVM();

90

91 #ifdef USE_SIMULATOR

92

93 // Run tests with the simulator.

94 #define SETUP_SIZE(buf_size) \

95 Isolate* isolate = CcTest::i_isolate(); \

96 HandleScope scope(isolate); \

97 CHECK(isolate != NULL); \

98 byte* buf = new byte[buf_size]; \

99 MacroAssembler masm(isolate, buf, buf_size, \

100 v8::internal::CodeObjectRequired::kYes); \

101 Decoder<DispatchingDecoderVisitor>* decoder = \

102 new Decoder<DispatchingDecoderVisitor>(); \

103 Simulator simulator(decoder); \

104 RegisterDump core;

105

106 // Reset the assembler and simulator, so that instructions can be generated,

107 // but don't actually emit any code. This can be used by tests that need to

108 // emit instructions at the start of the buffer. Note that START_AFTER_RESET

109 // must be called before any callee-saved register is modified, and before an

110 // END is encountered.

111 //

112 // Most tests should call START, rather than call RESET directly.

113 #define RESET() \

114 __ Reset(); \

115 simulator.ResetState();

116

117 #define START_AFTER_RESET() \

118 __ SetStackPointer(csp); \

119 __ PushCalleeSavedRegisters(); \

120 __ Debug("Start test.", __LINE__, TRACE_ENABLE \| LOG_ALL);

121

122 #define START() \

123 RESET(); \

124 START_AFTER_RESET();

125

126 #define RUN() simulator.RunFrom(reinterpret_cast<Instruction*>(buf))

127

128 #define END() \

129 __ Debug("End test.", __LINE__, TRACE_DISABLE \| LOG_ALL); \

130 core.Dump(&masm); \

131 __ PopCalleeSavedRegisters(); \

132 __ Ret(); \

133 __ GetCode(NULL);

134

135 #define TEARDOWN() delete[] buf;

136

137 #else // ifdef USE_SIMULATOR.

138 // Run the test on real hardware or models.

139 #define SETUP_SIZE(buf_size) \

140 Isolate* isolate = CcTest::i_isolate(); \

141 HandleScope scope(isolate); \

142 CHECK(isolate != NULL); \

143 size_t actual_size; \

144 byte* buf = static_cast<byte*>( \

145 v8::base::OS::Allocate(buf_size, &actual_size, true)); \

146 MacroAssembler masm(isolate, buf, actual_size, \

147 v8::internal::CodeObjectRequired::kYes); \

148 RegisterDump core;

149

150 #define RESET() \

151 __ Reset(); \

152 /* Reset the machine state (like simulator.ResetState()). */ \

153 __ Msr(NZCV, xzr); \

154 __ Msr(FPCR, xzr);

155

156 #define START_AFTER_RESET() \

157 __ SetStackPointer(csp); \

158 __ PushCalleeSavedRegisters();

159

160 #define START() \

161 RESET(); \

162 START_AFTER_RESET();

163

164 #define RUN() \

165 Assembler::FlushICache(isolate, buf, masm.SizeOfGeneratedCode()); \

166 { \

167 void (*test_function)(void); \

168 memcpy(&test_function, &buf, sizeof(buf)); \

169 test_function(); \

170 }

171

172 #define END() \

173 core.Dump(&masm); \

174 __ PopCalleeSavedRegisters(); \

175 __ Ret(); \

176 __ GetCode(NULL);

177

178 #define TEARDOWN() v8::base::OS::Free(buf, actual_size);

179

180 #endif // ifdef USE_SIMULATOR.

181

182 #define CHECK_EQUAL_NZCV(expected) CHECK(EqualNzcv(expected, core.flags_nzcv()))

183

184 #define CHECK_EQUAL_REGISTERS(expected) CHECK(EqualRegisters(&expected, &core))

185

186 #define CHECK_EQUAL_32(expected, result) \

187 CHECK(Equal32(static_cast<uint32_t>(expected), &core, result))

188

189 #define CHECK_EQUAL_FP32(expected, result) \

190 CHECK(EqualFP32(expected, &core, result))

191

192 #define CHECK_EQUAL_64(expected, result) CHECK(Equal64(expected, &core, result))

193

194 #define CHECK_EQUAL_FP64(expected, result) \

195 CHECK(EqualFP64(expected, &core, result))

196

197 #ifdef DEBUG

198 #define CHECK_LITERAL_POOL_SIZE(expected) \

199 CHECK((expected) == (__ LiteralPoolSize()))

200 #else

201 #define CHECK_LITERAL_POOL_SIZE(expected) ((void)0)

202 #endif

203

204 // The maximum number of errors to report in detail for each test.

205 static const unsigned kErrorReportLimit = 8;

206

207 typedef void (MacroAssembler::*Test1OpNEONHelper_t)(const VRegister& vd,

208 const VRegister& vn);

209 typedef void (MacroAssembler::*Test2OpNEONHelper_t)(const VRegister& vd,

210 const VRegister& vn,

211 const VRegister& vm);

212 typedef void (MacroAssembler::*TestByElementNEONHelper_t)(const VRegister& vd,

213 const VRegister& vn,

214 const VRegister& vm,

215 int vm_index);

216 typedef void (MacroAssembler::*TestOpImmOpImmVdUpdateNEONHelper_t)(

217 const VRegister& vd, int imm1, const VRegister& vn, int imm2);

218

219 // This helps using the same typename for both the function pointer

220 // and the array of immediates passed to helper routines.

221 template <typename T>

222 class Test2OpImmediateNEONHelper_t {

223 public:

224 typedef void (MacroAssembler::*mnemonic)(const VRegister& vd,

225 const VRegister& vn, T imm);

226 };

227

228 namespace {

229

230 // Maximum number of hex characters required to represent values of either

231 // templated type.

232 template <typename Ta, typename Tb>

233 unsigned MaxHexCharCount() {

234 unsigned count = static_cast<unsigned>(std::max(sizeof(Ta), sizeof(Tb)));

235 return (count * 8) / 4;

236 }

237

238 // ==== Tests for instructions of the form <INST> VReg, VReg. ====

239

240 void Test1OpNEON_Helper(Test1OpNEONHelper_t helper, uintptr_t inputs_n,

241 unsigned inputs_n_length, uintptr_t results,

242 VectorFormat vd_form, VectorFormat vn_form) {

243 DCHECK_NE(vd_form, kFormatUndefined);

244 DCHECK_NE(vn_form, kFormatUndefined);

245

246 SETUP();

247 START();

248

249 // Roll up the loop to keep the code size down.

250 Label loop_n;

251

252 Register out = x0;

253 Register inputs_n_base = x1;

254 Register inputs_n_last_16bytes = x3;

255 Register index_n = x5;

256

257 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);

258 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);

259

260 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);

261 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);

262 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);

263 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);

264 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);

265

266 // These will be either a D- or a Q-register form, with a single lane

267 // (for use in scalar load and store operations).

268 VRegister vd = VRegister::Create(0, vd_bits);

269 VRegister vn = v1.V16B();

270 VRegister vntmp = v3.V16B();

271

272 // These will have the correct format for use when calling 'helper'.

273 VRegister vd_helper = VRegister::Create(0, vd_bits, vd_lane_count);

274 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count);

275

276 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.

277 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits);

278

279 __ Mov(out, results);

280

281 __ Mov(inputs_n_base, inputs_n);

282 __ Mov(inputs_n_last_16bytes,

283 inputs_n + (vn_lane_bytes * inputs_n_length) - 16);

284

285 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));

286

287 __ Mov(index_n, 0);

288 __ Bind(&loop_n);

289

290 __ Ldr(vntmp_single,

291 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));

292 __ Ext(vn, vn, vntmp, vn_lane_bytes);

293

294 // Set the destination to zero.

295

296 // TODO(all): Setting the destination to values other than zero might be a

297 // better test for instructions such as sqxtn2 which may leave parts of V

298 // registers unchanged.

299 __ Movi(vd.V16B(), 0);

300

301 (masm.*helper)(vd_helper, vn_helper);

302

303 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));

304

305 __ Add(index_n, index_n, 1);

306 __ Cmp(index_n, inputs_n_length);

307 __ B(lo, &loop_n);

308

309 END();

310 RUN();

311 TEARDOWN();

312 }

313

314 // Test NEON instructions. The inputs_*[] and expected[] arrays should be

315 // arrays of rawbit representation of input values. This ensures that

316 // exact bit comparisons can be performed.

317 template <typename Td, typename Tn>

318 void Test1OpNEON(const char* name, Test1OpNEONHelper_t helper,

319 const Tn inputs_n[], unsigned inputs_n_length,

320 const Td expected[], unsigned expected_length,

321 VectorFormat vd_form, VectorFormat vn_form) {

322 DCHECK_GT(inputs_n_length, 0U);

323

324 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);

325 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);

326 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);

327

328 const unsigned results_length = inputs_n_length;

329 std::vector<Td> results(results_length * vd_lane_count, 0);

330 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();

331

332 Test1OpNEON_Helper(

333 helper, reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,

334 reinterpret_cast<uintptr_t>(results.data()), vd_form, vn_form);

335

336 // Check the results.

337 CHECK(expected_length == results_length);

338 unsigned error_count = 0;

339 unsigned d = 0;

340 const char* padding = " ";

341 DCHECK_GE(strlen(padding), (lane_len_in_hex + 1));

342 for (unsigned n = 0; n < inputs_n_length; n++, d++) {

343 bool error_in_vector = false;

344

345 for (unsigned lane = 0; lane < vd_lane_count; lane++) {

346 unsigned output_index = (n * vd_lane_count) + lane;

347

348 if (results[output_index] != expected[output_index]) {

349 error_in_vector = true;

350 break;

351 }

352 }

353

354 if (error_in_vector && (++error_count <= kErrorReportLimit)) {

355 printf("%s\n", name);

356 printf(" Vn%.s\| Vd%.s\| Expected\n", lane_len_in_hex + 1, padding,

357 lane_len_in_hex + 1, padding);

358

359 const unsigned first_index_n =

360 inputs_n_length - (16 / vn_lane_bytes) + n + 1;

361

362 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);

363 lane++) {

364 unsigned output_index = (n * vd_lane_count) + lane;

365 unsigned input_index_n = (first_index_n + lane) % inputs_n_length;

366

367 printf("%c0x%0" PRIx64 " \| 0x%0" PRIx64

368 " "

369 "\| 0x%0*" PRIx64 "\n",

370 results[output_index] != expected[output_index] ? '*' : ' ',

371 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]),

372 lane_len_in_hex, static_cast<uint64_t>(results[output_index]),

373 lane_len_in_hex, static_cast<uint64_t>(expected[output_index]));

374 }

375 }

376 }

377 DCHECK_EQ(d, expected_length);

378 if (error_count > kErrorReportLimit) {

379 printf("%u other errors follow.\n", error_count - kErrorReportLimit);

380 }

381 DCHECK_EQ(error_count, 0U);

382 }

383

384 // ==== Tests for instructions of the form <mnemonic> <V><d>, <Vn>.<T> ====

385 // where <V> is one of B, H, S or D registers.

386 // e.g. saddlv H1, v0.8B

387

388 // TODO(all): Change tests to store all lanes of the resulting V register.

389 // Some tests store all 128 bits of the resulting V register to

390 // check the simulator's behaviour on the rest of the register.

391 // This is better than storing the affected lanes only.

392 // Change any tests such as the 'Across' template to do the same.

393

394 void Test1OpAcrossNEON_Helper(Test1OpNEONHelper_t helper, uintptr_t inputs_n,

395 unsigned inputs_n_length, uintptr_t results,

396 VectorFormat vd_form, VectorFormat vn_form) {

397 DCHECK_NE(vd_form, kFormatUndefined);

398 DCHECK_NE(vn_form, kFormatUndefined);

399

400 SETUP();

401 START();

402

403 // Roll up the loop to keep the code size down.

404 Label loop_n;

405

406 Register out = x0;

407 Register inputs_n_base = x1;

408 Register inputs_n_last_vector = x3;

409 Register index_n = x5;

410

411 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);

412 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);

413 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);

414 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);

415 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);

416 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);

417

418 // Test destructive operations by (arbitrarily) using the same register for

419 // B and S lane sizes.

420 bool destructive = (vd_bits == kBRegSize) \|\| (vd_bits == kSRegSize);

421

422 // These will be either a D- or a Q-register form, with a single lane

423 // (for use in scalar load and store operations).

424 // Create two aliases for v8; the first is the destination for the tested

425 // instruction, the second, the whole Q register to check the results.

426 VRegister vd = VRegister::Create(0, vd_bits);

427 VRegister vdstr = VRegister::Create(0, kQRegSizeInBits);

428

429 VRegister vn = VRegister::Create(1, vn_bits);

430 VRegister vntmp = VRegister::Create(3, vn_bits);

431

432 // These will have the correct format for use when calling 'helper'.

433 VRegister vd_helper = VRegister::Create(0, vn_bits, vn_lane_count);

434 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count);

435

436 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.

437 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits);

438

439 // Same registers for use in the 'ext' instructions.

440 VRegister vn_ext = (kDRegSizeInBits == vn_bits) ? vn.V8B() : vn.V16B();

441 VRegister vntmp_ext =

442 (kDRegSizeInBits == vn_bits) ? vntmp.V8B() : vntmp.V16B();

443

444 __ Mov(out, results);

445

446 __ Mov(inputs_n_base, inputs_n);

447 __ Mov(inputs_n_last_vector,

448 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));

449

450 __ Ldr(vn, MemOperand(inputs_n_last_vector));

451

452 __ Mov(index_n, 0);

453 __ Bind(&loop_n);

454

455 __ Ldr(vntmp_single,

456 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));

457 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);

458

459 if (destructive) {

460 __ Mov(vd_helper, vn_helper);

461 (masm.*helper)(vd, vd_helper);

462 } else {

463 (masm.*helper)(vd, vn_helper);

464 }

465

466 __ Str(vdstr, MemOperand(out, kQRegSize, PostIndex));

467

468 __ Add(index_n, index_n, 1);

469 __ Cmp(index_n, inputs_n_length);

470 __ B(lo, &loop_n);

471

472 END();

473 RUN();

474 TEARDOWN();

475 }

476

477 // Test NEON instructions. The inputs_*[] and expected[] arrays should be

478 // arrays of rawbit representation of input values. This ensures that

479 // exact bit comparisons can be performed.

480 template <typename Td, typename Tn>

481 void Test1OpAcrossNEON(const char* name, Test1OpNEONHelper_t helper,

482 const Tn inputs_n[], unsigned inputs_n_length,

483 const Td expected[], unsigned expected_length,

484 VectorFormat vd_form, VectorFormat vn_form) {

485 DCHECK_GT(inputs_n_length, 0U);

486

487 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);

488 const unsigned vd_lanes_per_q = MaxLaneCountFromFormat(vd_form);

489

490 const unsigned results_length = inputs_n_length;

491 std::vector<Td> results(results_length * vd_lanes_per_q, 0);

492 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();

493

494 Test1OpAcrossNEON_Helper(

495 helper, reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,

496 reinterpret_cast<uintptr_t>(results.data()), vd_form, vn_form);

497

498 // Check the results.

499 DCHECK_EQ(expected_length, results_length);

500 unsigned error_count = 0;

501 unsigned d = 0;

502 const char* padding = " ";

503 DCHECK_GE(strlen(padding), (lane_len_in_hex + 1));

504 for (unsigned n = 0; n < inputs_n_length; n++, d++) {

505 bool error_in_vector = false;

506

507 for (unsigned lane = 0; lane < vd_lane_count; lane++) {

508 unsigned expected_index = (n * vd_lane_count) + lane;

509 unsigned results_index = (n * vd_lanes_per_q) + lane;

510

511 if (results[results_index] != expected[expected_index]) {

512 error_in_vector = true;

513 break;

514 }

515

516 // For across operations, the remaining lanes should be zero.

517 for (unsigned lane = vd_lane_count; lane < vd_lanes_per_q; lane++) {

518 unsigned results_index = (n * vd_lanes_per_q) + lane;

519 if (results[results_index] != 0) {

520 error_in_vector = true;

521 break;

522 }

523 }

524 }

525

526 if (error_in_vector && (++error_count <= kErrorReportLimit)) {

527 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);

528

529 printf("%s\n", name);

530 printf(" Vn%.s\| Vd%.s\| Expected\n", lane_len_in_hex + 1, padding,

531 lane_len_in_hex + 1, padding);

532

533 for (unsigned lane = 0; lane < vn_lane_count; lane++) {

534 unsigned results_index =

535 (n * vd_lanes_per_q) + ((vn_lane_count - 1) - lane);

536 unsigned input_index_n =

537 (inputs_n_length - vn_lane_count + n + 1 + lane) % inputs_n_length;

538

539 Td expect = 0;

540 if ((vn_lane_count - 1) == lane) {

541 // This is the last lane to be printed, ie. the least-significant

542 // lane, so use the expected value; any other lane should be zero.

543 unsigned expected_index = n * vd_lane_count;

544 expect = expected[expected_index];

545 }

546 printf("%c0x%0" PRIx64 " \| 0x%0" PRIx64 " \| 0x%0*" PRIx64 "\n",

547 results[results_index] != expect ? '*' : ' ', lane_len_in_hex,

548 static_cast<uint64_t>(inputs_n[input_index_n]), lane_len_in_hex,

549 static_cast<uint64_t>(results[results_index]), lane_len_in_hex,

550 static_cast<uint64_t>(expect));

551 }

552 }

553 }

554 DCHECK_EQ(d, expected_length);

555 if (error_count > kErrorReportLimit) {

556 printf("%u other errors follow.\n", error_count - kErrorReportLimit);

557 }

558 DCHECK_EQ(error_count, 0U);

559 }

560

561 // ==== Tests for instructions of the form <INST> VReg, VReg, VReg. ====

562

563 void Test2OpNEON_Helper(Test2OpNEONHelper_t helper, uintptr_t inputs_d,

564 uintptr_t inputs_n, unsigned inputs_n_length,

565 uintptr_t inputs_m, unsigned inputs_m_length,

566 uintptr_t results, VectorFormat vd_form,

567 VectorFormat vn_form, VectorFormat vm_form) {

568 DCHECK_NE(vd_form, kFormatUndefined);

569 DCHECK_NE(vn_form, kFormatUndefined);

570 DCHECK_NE(vm_form, kFormatUndefined);

571

572 SETUP();

573 START();

574

575 // Roll up the loop to keep the code size down.

576 Label loop_n, loop_m;

577

578 Register out = x0;

579 Register inputs_n_base = x1;

580 Register inputs_m_base = x2;

581 Register inputs_d_base = x3;

582 Register inputs_n_last_16bytes = x4;

583 Register inputs_m_last_16bytes = x5;

584 Register index_n = x6;

585 Register index_m = x7;

586

587 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);

588 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);

589

590 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);

591 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);

592 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);

593 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);

594 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);

595

596 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);

597 const unsigned vm_lane_count = LaneCountFromFormat(vm_form);

598 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);

599 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);

600 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);

601

602 // Always load and store 128 bits regardless of the format.

603 VRegister vd = v0.V16B();

604 VRegister vn = v1.V16B();

605 VRegister vm = v2.V16B();

606 VRegister vntmp = v3.V16B();

607 VRegister vmtmp = v4.V16B();

608 VRegister vres = v5.V16B();

609

610 // These will have the correct format for calling the 'helper'.

611 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count);

612 VRegister vm_helper = VRegister::Create(2, vm_bits, vm_lane_count);

613 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count);

614

615 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.

616 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits);

617 VRegister vmtmp_single = VRegister::Create(4, vm_lane_bits);

618

619 __ Mov(out, results);

620

621 __ Mov(inputs_d_base, inputs_d);

622

623 __ Mov(inputs_n_base, inputs_n);

624 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));

625 __ Mov(inputs_m_base, inputs_m);

626 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));

627

628 __ Ldr(vd, MemOperand(inputs_d_base));

629 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));

630 __ Ldr(vm, MemOperand(inputs_m_last_16bytes));

631

632 __ Mov(index_n, 0);

633 __ Bind(&loop_n);

634

635 __ Ldr(vntmp_single,

636 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));

637 __ Ext(vn, vn, vntmp, vn_lane_bytes);

638

639 __ Mov(index_m, 0);

640 __ Bind(&loop_m);

641

642 __ Ldr(vmtmp_single,

643 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));

644 __ Ext(vm, vm, vmtmp, vm_lane_bytes);

645

646 __ Mov(vres, vd);

647

648 (masm.*helper)(vres_helper, vn_helper, vm_helper);

649

650 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));

651

652 __ Add(index_m, index_m, 1);

653 __ Cmp(index_m, inputs_m_length);

654 __ B(lo, &loop_m);

655

656 __ Add(index_n, index_n, 1);

657 __ Cmp(index_n, inputs_n_length);

658 __ B(lo, &loop_n);

659

660 END();

661 RUN();

662 TEARDOWN();

663 }

664

665 // Test NEON instructions. The inputs_*[] and expected[] arrays should be

666 // arrays of rawbit representation of input values. This ensures that

667 // exact bit comparisons can be performed.

668 template <typename Td, typename Tn, typename Tm>

669 void Test2OpNEON(const char* name, Test2OpNEONHelper_t helper,

670 const Td inputs_d[], const Tn inputs_n[],

671 unsigned inputs_n_length, const Tm inputs_m[],

672 unsigned inputs_m_length, const Td expected[],

673 unsigned expected_length, VectorFormat vd_form,

674 VectorFormat vn_form, VectorFormat vm_form) {

675 DCHECK(inputs_n_length > 0 && inputs_m_length > 0);

676

677 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);

678

679 const unsigned results_length = inputs_n_length * inputs_m_length;

680 std::vector<Td> results(results_length * vd_lane_count);

681 const unsigned lane_len_in_hex =

682 static_cast<unsigned>(std::max(sizeof(Td), sizeof(Tm)) * 8) / 4;

683

684 Test2OpNEON_Helper(helper, reinterpret_cast<uintptr_t>(inputs_d),

685 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,

686 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length,

687 reinterpret_cast<uintptr_t>(results.data()), vd_form,

688 vn_form, vm_form);

689

690 // Check the results.

691 CHECK(expected_length == results_length);

692 unsigned error_count = 0;

693 unsigned d = 0;

694 const char* padding = " ";

695 DCHECK_GE(strlen(padding), lane_len_in_hex + 1);

696 for (unsigned n = 0; n < inputs_n_length; n++) {

697 for (unsigned m = 0; m < inputs_m_length; m++, d++) {

698 bool error_in_vector = false;

699

700 for (unsigned lane = 0; lane < vd_lane_count; lane++) {

701 unsigned output_index =

702 (n * inputs_m_length * vd_lane_count) + (m * vd_lane_count) + lane;

703

704 if (results[output_index] != expected[output_index]) {

705 error_in_vector = true;

706 break;

707 }

708 }

709

710 if (error_in_vector && (++error_count <= kErrorReportLimit)) {

711 printf("%s\n", name);

712 printf(" Vd%.s\| Vn%.s\| Vm%.s\| Vd%.s\| Expected\n",

713 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding,

714 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding);

715

716 for (unsigned lane = 0; lane < vd_lane_count; lane++) {

717 unsigned output_index = (n * inputs_m_length * vd_lane_count) +

718 (m * vd_lane_count) + lane;

719 unsigned input_index_n =

720 (inputs_n_length - vd_lane_count + n + 1 + lane) %

721 inputs_n_length;

722 unsigned input_index_m =

723 (inputs_m_length - vd_lane_count + m + 1 + lane) %

724 inputs_m_length;

725

726 printf(

727 "%c0x%0" PRIx64 " \| 0x%0" PRIx64 " \| 0x%0*" PRIx64

728 " "

729 "\| 0x%0" PRIx64 " \| 0x%0" PRIx64 "\n",

730 results[output_index] != expected[output_index] ? '*' : ' ',

731 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]),

732 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]),

733 lane_len_in_hex, static_cast<uint64_t>(inputs_m[input_index_m]),

734 lane_len_in_hex, static_cast<uint64_t>(results[output_index]),

735 lane_len_in_hex, static_cast<uint64_t>(expected[output_index]));

736 }

737 }

738 }

739 }

740 DCHECK_EQ(d, expected_length);

741 if (error_count > kErrorReportLimit) {

742 printf("%u other errors follow.\n", error_count - kErrorReportLimit);

743 }

744 DCHECK_EQ(error_count, 0U);

745 }

746

747 // ==== Tests for instructions of the form <INST> Vd, Vn, Vm[<#index>]. ====

748

749 void TestByElementNEON_Helper(TestByElementNEONHelper_t helper,

750 uintptr_t inputs_d, uintptr_t inputs_n,

751 unsigned inputs_n_length, uintptr_t inputs_m,

752 unsigned inputs_m_length, const int indices[],

753 unsigned indices_length, uintptr_t results,

754 VectorFormat vd_form, VectorFormat vn_form,

755 VectorFormat vm_form) {

756 DCHECK_NE(vd_form, kFormatUndefined);

757 DCHECK_NE(vn_form, kFormatUndefined);

758 DCHECK_NE(vm_form, kFormatUndefined);

759

760 SETUP();

761 START();

762

763 // Roll up the loop to keep the code size down.

764 Label loop_n, loop_m;

765

766 Register out = x0;

767 Register inputs_n_base = x1;

768 Register inputs_m_base = x2;

769 Register inputs_d_base = x3;

770 Register inputs_n_last_16bytes = x4;

771 Register inputs_m_last_16bytes = x5;

772 Register index_n = x6;

773 Register index_m = x7;

774

775 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);

776 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);

777

778 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);

779 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);

780 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);

781 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);

782 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);

783

784 const unsigned vm_bits = RegisterSizeInBitsFromFormat(vm_form);

785 const unsigned vm_lane_count = LaneCountFromFormat(vm_form);

786 const unsigned vm_lane_bytes = LaneSizeInBytesFromFormat(vm_form);

787 const unsigned vm_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vm_form);

788 const unsigned vm_lane_bits = LaneSizeInBitsFromFormat(vm_form);

789

790 // Always load and store 128 bits regardless of the format.

791 VRegister vd = v0.V16B();

792 VRegister vn = v1.V16B();

793 VRegister vm = v2.V16B();

794 VRegister vntmp = v3.V16B();

795 VRegister vmtmp = v4.V16B();

796 VRegister vres = v5.V16B();

797

798 // These will have the correct format for calling the 'helper'.

799 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count);

800 VRegister vm_helper = VRegister::Create(2, vm_bits, vm_lane_count);

801 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count);

802

803 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.

804 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits);

805 VRegister vmtmp_single = VRegister::Create(4, vm_lane_bits);

806

807 __ Mov(out, results);

808

809 __ Mov(inputs_d_base, inputs_d);

810

811 __ Mov(inputs_n_base, inputs_n);

812 __ Mov(inputs_n_last_16bytes, inputs_n + (inputs_n_length - 16));

813 __ Mov(inputs_m_base, inputs_m);

814 __ Mov(inputs_m_last_16bytes, inputs_m + (inputs_m_length - 16));

815

816 __ Ldr(vd, MemOperand(inputs_d_base));

817 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));

818 __ Ldr(vm, MemOperand(inputs_m_last_16bytes));

819

820 __ Mov(index_n, 0);

821 __ Bind(&loop_n);

822

823 __ Ldr(vntmp_single,

824 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));

825 __ Ext(vn, vn, vntmp, vn_lane_bytes);

826

827 __ Mov(index_m, 0);

828 __ Bind(&loop_m);

829

830 __ Ldr(vmtmp_single,

831 MemOperand(inputs_m_base, index_m, LSL, vm_lane_bytes_log2));

832 __ Ext(vm, vm, vmtmp, vm_lane_bytes);

833

834 __ Mov(vres, vd);

835 {

836 for (unsigned i = 0; i < indices_length; i++) {

837 (masm.*helper)(vres_helper, vn_helper, vm_helper, indices[i]);

838 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));

839 }

840 }

841

842 __ Add(index_m, index_m, 1);

843 __ Cmp(index_m, inputs_m_length);

844 __ B(lo, &loop_m);

845

846 __ Add(index_n, index_n, 1);

847 __ Cmp(index_n, inputs_n_length);

848 __ B(lo, &loop_n);

849

850 END();

851 RUN();

852 TEARDOWN();

853 }

854

855 // Test NEON instructions. The inputs_*[] and expected[] arrays should be

856 // arrays of rawbit representation of input values. This ensures that

857 // exact bit comparisons can be performed.

858 template <typename Td, typename Tn, typename Tm>

859 void TestByElementNEON(const char* name, TestByElementNEONHelper_t helper,

860 const Td inputs_d[], const Tn inputs_n[],

861 unsigned inputs_n_length, const Tm inputs_m[],

862 unsigned inputs_m_length, const int indices[],

863 unsigned indices_length, const Td expected[],

864 unsigned expected_length, VectorFormat vd_form,

865 VectorFormat vn_form, VectorFormat vm_form) {

866 DCHECK_GT(inputs_n_length, 0U);

867 DCHECK_GT(inputs_m_length, 0U);

868 DCHECK_GT(indices_length, 0U);

869

870 const unsigned vd_lane_count = MaxLaneCountFromFormat(vd_form);

871

872 const unsigned results_length =

873 inputs_n_length * inputs_m_length * indices_length;

874 std::vector<Td> results(results_length * vd_lane_count, 0);

875 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tm>();

876

877 TestByElementNEON_Helper(

878 helper, reinterpret_cast<uintptr_t>(inputs_d),

879 reinterpret_cast<uintptr_t>(inputs_n), inputs_n_length,

880 reinterpret_cast<uintptr_t>(inputs_m), inputs_m_length, indices,

881 indices_length, reinterpret_cast<uintptr_t>(results.data()), vd_form,

882 vn_form, vm_form);

883

884 // Check the results.

885 CHECK(expected_length == results_length);

886 unsigned error_count = 0;

887 unsigned d = 0;

888 const char* padding = " ";

889 DCHECK_GE(strlen(padding), lane_len_in_hex + 1);

890 for (unsigned n = 0; n < inputs_n_length; n++) {

891 for (unsigned m = 0; m < inputs_m_length; m++) {

892 for (unsigned index = 0; index < indices_length; index++, d++) {

893 bool error_in_vector = false;

894

895 for (unsigned lane = 0; lane < vd_lane_count; lane++) {

896 unsigned output_index =

897 (n * inputs_m_length * indices_length * vd_lane_count) +

898 (m * indices_length * vd_lane_count) + (index * vd_lane_count) +

899 lane;

900

901 if (results[output_index] != expected[output_index]) {

902 error_in_vector = true;

903 break;

904 }

905 }

906

907 if (error_in_vector && (++error_count <= kErrorReportLimit)) {

908 printf("%s\n", name);

909 printf(" Vd%.s\| Vn%.s\| Vm%.s\| Index \| Vd%.s\| Expected\n",

910 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding,

911 lane_len_in_hex + 1, padding, lane_len_in_hex + 1, padding);

912

913 for (unsigned lane = 0; lane < vd_lane_count; lane++) {

914 unsigned output_index =

915 (n * inputs_m_length * indices_length * vd_lane_count) +

916 (m * indices_length * vd_lane_count) + (index * vd_lane_count) +

917 lane;

918 unsigned input_index_n =

919 (inputs_n_length - vd_lane_count + n + 1 + lane) %

920 inputs_n_length;

921 unsigned input_index_m =

922 (inputs_m_length - vd_lane_count + m + 1 + lane) %

923 inputs_m_length;

924

925 printf(

926 "%c0x%0" PRIx64 " \| 0x%0" PRIx64 " \| 0x%0*" PRIx64

927 " "

928 "\| [%3d] \| 0x%0" PRIx64 " \| 0x%0" PRIx64 "\n",

929 results[output_index] != expected[output_index] ? '*' : ' ',

930 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]),

931 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]),

932 lane_len_in_hex, static_cast<uint64_t>(inputs_m[input_index_m]),

933 indices[index], lane_len_in_hex,

934 static_cast<uint64_t>(results[output_index]), lane_len_in_hex,

935 static_cast<uint64_t>(expected[output_index]));

936 }

937 }

938 }

939 }

940 }

941 DCHECK_EQ(d, expected_length);

942 if (error_count > kErrorReportLimit) {

943 printf("%u other errors follow.\n", error_count - kErrorReportLimit);

944 }

945 CHECK(error_count == 0);

946 }

947

948 // ==== Tests for instructions of the form <INST> VReg, VReg, #Immediate. ====

949

950 template <typename Tm>

951 void Test2OpImmNEON_Helper(

952 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,

953 uintptr_t inputs_n, unsigned inputs_n_length, const Tm inputs_m[],

954 unsigned inputs_m_length, uintptr_t results, VectorFormat vd_form,

955 VectorFormat vn_form) {

956 DCHECK(vd_form != kFormatUndefined && vn_form != kFormatUndefined);

957

958 SETUP();

959 START();

960

961 // Roll up the loop to keep the code size down.

962 Label loop_n;

963

964 Register out = x0;

965 Register inputs_n_base = x1;

966 Register inputs_n_last_16bytes = x3;

967 Register index_n = x5;

968

969 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);

970 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);

971

972 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);

973 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);

974 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);

975 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);

976 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);

977

978 // These will be either a D- or a Q-register form, with a single lane

979 // (for use in scalar load and store operations).

980 VRegister vd = VRegister::Create(0, vd_bits);

981 VRegister vn = v1.V16B();

982 VRegister vntmp = v3.V16B();

983

984 // These will have the correct format for use when calling 'helper'.

985 VRegister vd_helper = VRegister::Create(0, vd_bits, vd_lane_count);

986 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count);

987

988 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.

989 VRegister vntmp_single = VRegister::Create(3, vn_lane_bits);

990

991 __ Mov(out, results);

992

993 __ Mov(inputs_n_base, inputs_n);

994 __ Mov(inputs_n_last_16bytes,

995 inputs_n + (vn_lane_bytes * inputs_n_length) - 16);

996

997 __ Ldr(vn, MemOperand(inputs_n_last_16bytes));

998

999 __ Mov(index_n, 0);

1000 __ Bind(&loop_n);

1001

1002 __ Ldr(vntmp_single,

1003 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));

1004 __ Ext(vn, vn, vntmp, vn_lane_bytes);

1005

1006 // Set the destination to zero for tests such as '[r]shrn2'.

1007 // TODO(all): Setting the destination to values other than zero might be a

1008 // better test for shift and accumulate instructions (srsra/ssra/usra/ursra).

1009 __ Movi(vd.V16B(), 0);

1010

1011 {

1012 for (unsigned i = 0; i < inputs_m_length; i++) {

1013 (masm.*helper)(vd_helper, vn_helper, inputs_m[i]);

1014 __ Str(vd, MemOperand(out, vd.SizeInBytes(), PostIndex));

1015 }

1016 }

1017

1018 __ Add(index_n, index_n, 1);

1019 __ Cmp(index_n, inputs_n_length);

1020 __ B(lo, &loop_n);

1021

1022 END();

1023 RUN();

1024 TEARDOWN();

1025 }

1026

1027 // Test NEON instructions. The inputs_*[] and expected[] arrays should be

1028 // arrays of rawbit representation of input values. This ensures that

1029 // exact bit comparisons can be performed.

1030 template <typename Td, typename Tn, typename Tm>

1031 void Test2OpImmNEON(const char* name,

1032 typename Test2OpImmediateNEONHelper_t<Tm>::mnemonic helper,

1033 const Tn inputs_n[], unsigned inputs_n_length,

1034 const Tm inputs_m[], unsigned inputs_m_length,

1035 const Td expected[], unsigned expected_length,

1036 VectorFormat vd_form, VectorFormat vn_form) {

1037 DCHECK(inputs_n_length > 0 && inputs_m_length > 0);

1038

1039 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);

1040 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);

1041 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);

1042

1043 const unsigned results_length = inputs_n_length * inputs_m_length;

1044 std::vector<Td> results(results_length * vd_lane_count, 0);

1045 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();

1046

1047 Test2OpImmNEON_Helper(helper, reinterpret_cast<uintptr_t>(inputs_n),

1048 inputs_n_length, inputs_m, inputs_m_length,

1049 reinterpret_cast<uintptr_t>(results.data()), vd_form,

1050 vn_form);

1051

1052 // Check the results.

1053 CHECK(expected_length == results_length);

1054 unsigned error_count = 0;

1055 unsigned d = 0;

1056 const char* padding = " ";

1057 DCHECK_GE(strlen(padding), lane_len_in_hex + 1);

1058 for (unsigned n = 0; n < inputs_n_length; n++) {

1059 for (unsigned m = 0; m < inputs_m_length; m++, d++) {

1060 bool error_in_vector = false;

1061

1062 for (unsigned lane = 0; lane < vd_lane_count; lane++) {

1063 unsigned output_index =

1064 (n * inputs_m_length * vd_lane_count) + (m * vd_lane_count) + lane;

1065

1066 if (results[output_index] != expected[output_index]) {

1067 error_in_vector = true;

1068 break;

1069 }

1070 }

1071

1072 if (error_in_vector && (++error_count <= kErrorReportLimit)) {

1073 printf("%s\n", name);

1074 printf(" Vn%.s\| Imm%.s\| Vd%.*s\| Expected\n", lane_len_in_hex + 1,

1075 padding, lane_len_in_hex, padding, lane_len_in_hex + 1, padding);

1076

1077 const unsigned first_index_n =

1078 inputs_n_length - (16 / vn_lane_bytes) + n + 1;

1079

1080 for (unsigned lane = 0; lane < std::max(vd_lane_count, vn_lane_count);

1081 lane++) {

1082 unsigned output_index = (n * inputs_m_length * vd_lane_count) +

1083 (m * vd_lane_count) + lane;

1084 unsigned input_index_n = (first_index_n + lane) % inputs_n_length;

1085 unsigned input_index_m = m;

1086

1087 printf(

1088 "%c0x%0" PRIx64 " \| 0x%0" PRIx64

1089 " "

1090 "\| 0x%0" PRIx64 " \| 0x%0" PRIx64 "\n",

1091 results[output_index] != expected[output_index] ? '*' : ' ',

1092 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]),

1093 lane_len_in_hex, static_cast<uint64_t>(inputs_m[input_index_m]),

1094 lane_len_in_hex, static_cast<uint64_t>(results[output_index]),

1095 lane_len_in_hex, static_cast<uint64_t>(expected[output_index]));

1096 }

1097 }

1098 }

1099 }

1100 DCHECK_EQ(d, expected_length);

1101 if (error_count > kErrorReportLimit) {

1102 printf("%u other errors follow.\n", error_count - kErrorReportLimit);

1103 }

1104 CHECK(error_count == 0);

1105 }

1106

1107 // ==== Tests for instructions of the form <INST> VReg, #Imm, VReg, #Imm. ====

1108

1109 void TestOpImmOpImmNEON_Helper(TestOpImmOpImmVdUpdateNEONHelper_t helper,

1110 uintptr_t inputs_d, const int inputs_imm1[],

1111 unsigned inputs_imm1_length, uintptr_t inputs_n,

1112 unsigned inputs_n_length,

1113 const int inputs_imm2[],

1114 unsigned inputs_imm2_length, uintptr_t results,

1115 VectorFormat vd_form, VectorFormat vn_form) {

1116 DCHECK_NE(vd_form, kFormatUndefined);

1117 DCHECK_NE(vn_form, kFormatUndefined);

1118

1119 SETUP();

1120 START();

1121

1122 // Roll up the loop to keep the code size down.

1123 Label loop_n;

1124

1125 Register out = x0;

1126 Register inputs_d_base = x1;

1127 Register inputs_n_base = x2;

1128 Register inputs_n_last_vector = x4;

1129 Register index_n = x6;

1130

1131 const unsigned vd_bits = RegisterSizeInBitsFromFormat(vd_form);

1132 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);

1133

1134 const unsigned vn_bits = RegisterSizeInBitsFromFormat(vn_form);

1135 const unsigned vn_lane_count = LaneCountFromFormat(vn_form);

1136 const unsigned vn_lane_bytes = LaneSizeInBytesFromFormat(vn_form);

1137 const unsigned vn_lane_bytes_log2 = LaneSizeInBytesLog2FromFormat(vn_form);

1138 const unsigned vn_lane_bits = LaneSizeInBitsFromFormat(vn_form);

1139

1140 // These will be either a D- or a Q-register form, with a single lane

1141 // (for use in scalar load and store operations).

1142 VRegister vd = VRegister::Create(0, vd_bits);

1143 VRegister vn = VRegister::Create(1, vn_bits);

1144 VRegister vntmp = VRegister::Create(4, vn_bits);

1145 VRegister vres = VRegister::Create(5, vn_bits);

1146

1147 VRegister vn_helper = VRegister::Create(1, vn_bits, vn_lane_count);

1148 VRegister vres_helper = VRegister::Create(5, vd_bits, vd_lane_count);

1149

1150 // 'v*tmp_single' will be either 'Vt.B', 'Vt.H', 'Vt.S' or 'Vt.D'.

1151 VRegister vntmp_single = VRegister::Create(4, vn_lane_bits);

1152

1153 // Same registers for use in the 'ext' instructions.

1154 VRegister vn_ext = (kDRegSize == vn_bits) ? vn.V8B() : vn.V16B();

1155 VRegister vntmp_ext = (kDRegSize == vn_bits) ? vntmp.V8B() : vntmp.V16B();

1156

1157 __ Mov(out, results);

1158

1159 __ Mov(inputs_d_base, inputs_d);

1160

1161 __ Mov(inputs_n_base, inputs_n);

1162 __ Mov(inputs_n_last_vector,

1163 inputs_n + vn_lane_bytes * (inputs_n_length - vn_lane_count));

1164

1165 __ Ldr(vd, MemOperand(inputs_d_base));

1166

1167 __ Ldr(vn, MemOperand(inputs_n_last_vector));

1168

1169 __ Mov(index_n, 0);

1170 __ Bind(&loop_n);

1171

1172 __ Ldr(vntmp_single,

1173 MemOperand(inputs_n_base, index_n, LSL, vn_lane_bytes_log2));

1174 __ Ext(vn_ext, vn_ext, vntmp_ext, vn_lane_bytes);

1175

1176 for (unsigned i = 0; i < inputs_imm1_length; i++) {

1177 for (unsigned j = 0; j < inputs_imm2_length; j++) {

1178 __ Mov(vres, vd);

1179 (masm.*helper)(vres_helper, inputs_imm1[i], vn_helper, inputs_imm2[j]);

1180 __ Str(vres, MemOperand(out, vd.SizeInBytes(), PostIndex));

1181 }

1182 }

1183

1184 __ Add(index_n, index_n, 1);

1185 __ Cmp(index_n, inputs_n_length);

1186 __ B(lo, &loop_n);

1187

1188 END();

1189 RUN();

1190 TEARDOWN();

1191 }

1192

1193 // Test NEON instructions. The inputs_*[] and expected[] arrays should be

1194 // arrays of rawbit representation of input values. This ensures that

1195 // exact bit comparisons can be performed.

1196 template <typename Td, typename Tn>

1197 void TestOpImmOpImmNEON(const char* name,

1198 TestOpImmOpImmVdUpdateNEONHelper_t helper,

1199 const Td inputs_d[], const int inputs_imm1[],

1200 unsigned inputs_imm1_length, const Tn inputs_n[],

1201 unsigned inputs_n_length, const int inputs_imm2[],

1202 unsigned inputs_imm2_length, const Td expected[],

1203 unsigned expected_length, VectorFormat vd_form,

1204 VectorFormat vn_form) {

1205 DCHECK_GT(inputs_n_length, 0U);

1206 DCHECK_GT(inputs_imm1_length, 0U);

1207 DCHECK_GT(inputs_imm2_length, 0U);

1208

1209 const unsigned vd_lane_count = LaneCountFromFormat(vd_form);

1210

1211 const unsigned results_length =

1212 inputs_n_length * inputs_imm1_length * inputs_imm2_length;

1213

1214 std::vector<Td> results(results_length * vd_lane_count, 0);

1215 const unsigned lane_len_in_hex = MaxHexCharCount<Td, Tn>();

1216

1217 TestOpImmOpImmNEON_Helper(

1218 helper, reinterpret_cast<uintptr_t>(inputs_d), inputs_imm1,

1219 inputs_imm1_length, reinterpret_cast<uintptr_t>(inputs_n),

1220 inputs_n_length, inputs_imm2, inputs_imm2_length,

1221 reinterpret_cast<uintptr_t>(results.data()), vd_form, vn_form);

1222

1223 // Check the results.

1224 CHECK(expected_length == results_length);

1225 unsigned error_count = 0;

1226 unsigned counted_length = 0;

1227 const char* padding = " ";

1228 DCHECK(strlen(padding) >= (lane_len_in_hex + 1));

1229 for (unsigned n = 0; n < inputs_n_length; n++) {

1230 for (unsigned imm1 = 0; imm1 < inputs_imm1_length; imm1++) {

1231 for (unsigned imm2 = 0; imm2 < inputs_imm2_length; imm2++) {

1232 bool error_in_vector = false;

1233

1234 counted_length++;

1235

1236 for (unsigned lane = 0; lane < vd_lane_count; lane++) {

1237 unsigned output_index =

1238 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) +

1239 (imm1 * inputs_imm2_length * vd_lane_count) +

1240 (imm2 * vd_lane_count) + lane;

1241

1242 if (results[output_index] != expected[output_index]) {

1243 error_in_vector = true;

1244 break;

1245 }

1246 }

1247

1248 if (error_in_vector && (++error_count <= kErrorReportLimit)) {

1249 printf("%s\n", name);

1250 printf(" Vd%.s\| Imm%.s\| Vn%.s\| Imm%.s\| Vd%.*s\| Expected\n",

1251 lane_len_in_hex + 1, padding, lane_len_in_hex, padding,

1252 lane_len_in_hex + 1, padding, lane_len_in_hex, padding,

1253 lane_len_in_hex + 1, padding);

1254

1255 for (unsigned lane = 0; lane < vd_lane_count; lane++) {

1256 unsigned output_index =

1257 (n * inputs_imm1_length * inputs_imm2_length * vd_lane_count) +

1258 (imm1 * inputs_imm2_length * vd_lane_count) +

1259 (imm2 * vd_lane_count) + lane;

1260 unsigned input_index_n =

1261 (inputs_n_length - vd_lane_count + n + 1 + lane) %

1262 inputs_n_length;

1263 unsigned input_index_imm1 = imm1;

1264 unsigned input_index_imm2 = imm2;

1265

1266 printf(

1267 "%c0x%0" PRIx64 " \| 0x%0" PRIx64 " \| 0x%0*" PRIx64

1268 " "

1269 "\| 0x%0" PRIx64 " \| 0x%0" PRIx64 " \| 0x%0*" PRIx64 "\n",

1270 results[output_index] != expected[output_index] ? '*' : ' ',

1271 lane_len_in_hex, static_cast<uint64_t>(inputs_d[lane]),

1272 lane_len_in_hex,

1273 static_cast<uint64_t>(inputs_imm1[input_index_imm1]),

1274 lane_len_in_hex, static_cast<uint64_t>(inputs_n[input_index_n]),

1275 lane_len_in_hex,

1276 static_cast<uint64_t>(inputs_imm2[input_index_imm2]),

1277 lane_len_in_hex, static_cast<uint64_t>(results[output_index]),

1278 lane_len_in_hex, static_cast<uint64_t>(expected[output_index]));

1279 }

1280 }

1281 }

1282 }

1283 }

1284 DCHECK_EQ(counted_length, expected_length);

1285 if (error_count > kErrorReportLimit) {

1286 printf("%u other errors follow.\n", error_count - kErrorReportLimit);

1287 }

1288 CHECK(error_count == 0);

1289 }

1290

1291 } // anonymous namespace

1292

1293 // ==== NEON Tests. ====

1294

1295 // clang-format off

1296

1297 #define CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n) \

1298 Test1OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \

1299 &MacroAssembler::mnemonic, input_n, \

1300 (sizeof(input_n) / sizeof(input_n[0])), \

1301 kExpected_NEON_##mnemonic##_##vdform, \

1302 kExpectedCount_NEON_##mnemonic##_##vdform, kFormat##vdform, \

1303 kFormat##vnform)

1304

1305 #define CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vdform, vnform, input_n) \

1306 Test1OpAcrossNEON( \

1307 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY(vnform), \

1308 &MacroAssembler::mnemonic, input_n, \

1309 (sizeof(input_n) / sizeof(input_n[0])), \

1310 kExpected_NEON_##mnemonic##_##vdform##_##vnform, \

1311 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform, kFormat##vdform, \

1312 kFormat##vnform)

1313

1314 #define CALL_TEST_NEON_HELPER_2Op(mnemonic, vdform, vnform, vmform, input_d, \

1315 input_n, input_m) \

1316 Test2OpNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), \

1317 &MacroAssembler::mnemonic, input_d, input_n, \

1318 (sizeof(input_n) / sizeof(input_n[0])), input_m, \

1319 (sizeof(input_m) / sizeof(input_m[0])), \

1320 kExpected_NEON_##mnemonic##_##vdform, \

1321 kExpectedCount_NEON_##mnemonic##_##vdform, kFormat##vdform, \

1322 kFormat##vnform, kFormat##vmform)

1323

1324 #define CALL_TEST_NEON_HELPER_2OpImm(mnemonic, vdform, vnform, input_n, \

1325 input_m) \

1326 Test2OpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_2OPIMM", \

1327 &MacroAssembler::mnemonic, input_n, \

1328 (sizeof(input_n) / sizeof(input_n[0])), input_m, \

1329 (sizeof(input_m) / sizeof(input_m[0])), \

1330 kExpected_NEON_##mnemonic##_##vdform##_2OPIMM, \

1331 kExpectedCount_NEON_##mnemonic##_##vdform##_2OPIMM, \

1332 kFormat##vdform, kFormat##vnform)

1333

1334 #define CALL_TEST_NEON_HELPER_ByElement(mnemonic, vdform, vnform, vmform, \

1335 input_d, input_n, input_m, indices) \

1336 TestByElementNEON( \

1337 STRINGIFY(mnemonic) "_" STRINGIFY(vdform) "_" STRINGIFY( \

1338 vnform) "_" STRINGIFY(vmform), \

1339 &MacroAssembler::mnemonic, input_d, input_n, \

1340 (sizeof(input_n) / sizeof(input_n[0])), input_m, \

1341 (sizeof(input_m) / sizeof(input_m[0])), indices, \

1342 (sizeof(indices) / sizeof(indices[0])), \

1343 kExpected_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \

1344 kExpectedCount_NEON_##mnemonic##_##vdform##_##vnform##_##vmform, \

1345 kFormat##vdform, kFormat##vnform, kFormat##vmform)

1346

1347 #define CALL_TEST_NEON_HELPER_OpImmOpImm(helper, mnemonic, vdform, vnform, \

1348 input_d, input_imm1, input_n, \

1349 input_imm2) \

1350 TestOpImmOpImmNEON(STRINGIFY(mnemonic) "_" STRINGIFY(vdform), helper, \

1351 input_d, input_imm1, \

1352 (sizeof(input_imm1) / sizeof(input_imm1[0])), input_n, \

1353 (sizeof(input_n) / sizeof(input_n[0])), input_imm2, \

1354 (sizeof(input_imm2) / sizeof(input_imm2[0])), \

1355 kExpected_NEON_##mnemonic##_##vdform, \

1356 kExpectedCount_NEON_##mnemonic##_##vdform, \

1357 kFormat##vdform, kFormat##vnform)

1358

1359 #define CALL_TEST_NEON_HELPER_2SAME(mnemonic, variant, input) \

1360 CALL_TEST_NEON_HELPER_1Op(mnemonic, variant, variant, input)

1361

1362 #define DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \

1363 SIMTEST(mnemonic##_8B) { \

1364 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8B, kInput8bits##input); \

1365 } \

1366 SIMTEST(mnemonic##_16B) { \

1367 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 16B, kInput8bits##input); \

1368 }

1369

1370 #define DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input) \

1371 SIMTEST(mnemonic##_4H) { \

1372 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4H, kInput16bits##input); \

1373 } \

1374 SIMTEST(mnemonic##_8H) { \

1375 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 8H, kInput16bits##input); \

1376 }

1377

1378 #define DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \

1379 SIMTEST(mnemonic##_2S) { \

1380 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInput32bits##input); \

1381 } \

1382 SIMTEST(mnemonic##_4S) { \

1383 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInput32bits##input); \

1384 }

1385

1386 #define DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \

1387 DEFINE_TEST_NEON_2SAME_8B_16B(mnemonic, input) \

1388 DEFINE_TEST_NEON_2SAME_4H_8H(mnemonic, input)

1389

1390 #define DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \

1391 DEFINE_TEST_NEON_2SAME_BH(mnemonic, input) \

1392 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input)

1393

1394 #define DEFINE_TEST_NEON_2SAME(mnemonic, input) \

1395 DEFINE_TEST_NEON_2SAME_NO2D(mnemonic, input) \

1396 SIMTEST(mnemonic##_2D) { \

1397 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \

1398 }

1399 #define DEFINE_TEST_NEON_2SAME_SD(mnemonic, input) \

1400 DEFINE_TEST_NEON_2SAME_2S_4S(mnemonic, input) \

1401 SIMTEST(mnemonic##_2D) { \

1402 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInput64bits##input); \

1403 }

1404

1405 #define DEFINE_TEST_NEON_2SAME_FP(mnemonic, input) \

1406 SIMTEST(mnemonic##_2S) { \

1407 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2S, kInputFloat##input); \

1408 } \

1409 SIMTEST(mnemonic##_4S) { \

1410 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 4S, kInputFloat##input); \

1411 } \

1412 SIMTEST(mnemonic##_2D) { \

1413 CALL_TEST_NEON_HELPER_2SAME(mnemonic, 2D, kInputDouble##input); \

1414 }

1415

1416 #define DEFINE_TEST_NEON_2SAME_FP_SCALAR(mnemonic, input) \

1417 SIMTEST(mnemonic##_S) { \

1418 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInputFloat##input); \

1419 } \

1420 SIMTEST(mnemonic##_D) { \

1421 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInputDouble##input); \

1422 }

1423

1424 #define DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \

1425 SIMTEST(mnemonic##_B) { \

1426 CALL_TEST_NEON_HELPER_2SAME(mnemonic, B, kInput8bits##input); \

1427 }

1428 #define DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \

1429 SIMTEST(mnemonic##_H) { \

1430 CALL_TEST_NEON_HELPER_2SAME(mnemonic, H, kInput16bits##input); \

1431 }

1432 #define DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \

1433 SIMTEST(mnemonic##_S) { \

1434 CALL_TEST_NEON_HELPER_2SAME(mnemonic, S, kInput32bits##input); \

1435 }

1436 #define DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input) \

1437 SIMTEST(mnemonic##_D) { \

1438 CALL_TEST_NEON_HELPER_2SAME(mnemonic, D, kInput64bits##input); \

1439 }

1440

1441 #define DEFINE_TEST_NEON_2SAME_SCALAR(mnemonic, input) \

1442 DEFINE_TEST_NEON_2SAME_SCALAR_B(mnemonic, input) \

1443 DEFINE_TEST_NEON_2SAME_SCALAR_H(mnemonic, input) \

1444 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \

1445 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)

1446

1447 #define DEFINE_TEST_NEON_2SAME_SCALAR_SD(mnemonic, input) \

1448 DEFINE_TEST_NEON_2SAME_SCALAR_S(mnemonic, input) \

1449 DEFINE_TEST_NEON_2SAME_SCALAR_D(mnemonic, input)

1450

1451 #define CALL_TEST_NEON_HELPER_ACROSS(mnemonic, vd_form, vn_form, input_n) \

1452 CALL_TEST_NEON_HELPER_1OpAcross(mnemonic, vd_form, vn_form, input_n)

1453

1454 #define DEFINE_TEST_NEON_ACROSS(mnemonic, input) \

1455 SIMTEST(mnemonic##_B_8B) { \

1456 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 8B, kInput8bits##input); \

1457 } \

1458 SIMTEST(mnemonic##_B_16B) { \

1459 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, B, 16B, kInput8bits##input); \

1460 } \

1461 SIMTEST(mnemonic##_H_4H) { \

1462 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 4H, kInput16bits##input); \

1463 } \

1464 SIMTEST(mnemonic##_H_8H) { \

1465 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8H, kInput16bits##input); \

1466 } \

1467 SIMTEST(mnemonic##_S_4S) { \

1468 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInput32bits##input); \

1469 }

1470

1471 #define DEFINE_TEST_NEON_ACROSS_LONG(mnemonic, input) \

1472 SIMTEST(mnemonic##_H_8B) { \

1473 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 8B, kInput8bits##input); \

1474 } \

1475 SIMTEST(mnemonic##_H_16B) { \

1476 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, H, 16B, kInput8bits##input); \

1477 } \

1478 SIMTEST(mnemonic##_S_4H) { \

1479 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4H, kInput16bits##input); \

1480 } \

1481 SIMTEST(mnemonic##_S_8H) { \

1482 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 8H, kInput16bits##input); \

1483 } \

1484 SIMTEST(mnemonic##_D_4S) { \

1485 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, D, 4S, kInput32bits##input); \

1486 }

1487

1488 #define DEFINE_TEST_NEON_ACROSS_FP(mnemonic, input) \

1489 SIMTEST(mnemonic##_S_4S) { \

1490 CALL_TEST_NEON_HELPER_ACROSS(mnemonic, S, 4S, kInputFloat##input); \

1491 }

1492

1493 #define CALL_TEST_NEON_HELPER_2DIFF(mnemonic, vdform, vnform, input_n) \

1494 CALL_TEST_NEON_HELPER_1Op(mnemonic, vdform, vnform, input_n)

1495

1496 #define DEFINE_TEST_NEON_2DIFF_LONG(mnemonic, input) \

1497 SIMTEST(mnemonic##_4H) { \

1498 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 8B, kInput8bits##input); \

1499 } \

1500 SIMTEST(mnemonic##_8H) { \

1501 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8H, 16B, kInput8bits##input); \

1502 } \

1503 SIMTEST(mnemonic##_2S) { \

1504 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 4H, kInput16bits##input); \

1505 } \

1506 SIMTEST(mnemonic##_4S) { \

1507 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 8H, kInput16bits##input); \

1508 } \

1509 SIMTEST(mnemonic##_1D) { \

1510 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 1D, 2S, kInput32bits##input); \

1511 } \

1512 SIMTEST(mnemonic##_2D) { \

1513 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 4S, kInput32bits##input); \

1514 }

1515

1516 #define DEFINE_TEST_NEON_2DIFF_NARROW(mnemonic, input) \

1517 SIMTEST(mnemonic##_8B) { \

1518 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 8B, 8H, kInput16bits##input); \

1519 } \

1520 SIMTEST(mnemonic##_4H) { \

1521 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInput32bits##input); \

1522 } \

1523 SIMTEST(mnemonic##_2S) { \

1524 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInput64bits##input); \

1525 } \

1526 SIMTEST(mnemonic##2_16B) { \

1527 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 16B, 8H, kInput16bits##input); \

1528 } \

1529 SIMTEST(mnemonic##2_8H) { \

1530 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInput32bits##input); \

1531 } \

1532 SIMTEST(mnemonic##2_4S) { \

1533 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInput64bits##input); \

1534 }

1535

1536 #define DEFINE_TEST_NEON_2DIFF_FP_LONG(mnemonic, input) \

1537 SIMTEST(mnemonic##_4S) { \

1538 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4S, 4H, kInputFloat16##input); \

1539 } \

1540 SIMTEST(mnemonic##_2D) { \

1541 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2D, 2S, kInputFloat##input); \

1542 } \

1543 SIMTEST(mnemonic##2_4S) { \

1544 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 8H, kInputFloat16##input); \

1545 } \

1546 SIMTEST(mnemonic##2_2D) { \

1547 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 2D, 4S, kInputFloat##input); \

1548 }

1549

1550 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW(mnemonic, input) \

1551 SIMTEST(mnemonic##_4H) { \

1552 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 4H, 4S, kInputFloat##input); \

1553 } \

1554 SIMTEST(mnemonic##_2S) { \

1555 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \

1556 } \

1557 SIMTEST(mnemonic##2_8H) { \

1558 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 8H, 4S, kInputFloat##input); \

1559 } \

1560 SIMTEST(mnemonic##2_4S) { \

1561 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \

1562 }

1563

1564 #define DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(mnemonic, input) \

1565 SIMTEST(mnemonic##_2S) { \

1566 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, 2S, 2D, kInputDouble##input); \

1567 } \

1568 SIMTEST(mnemonic##2_4S) { \

1569 CALL_TEST_NEON_HELPER_2DIFF(mnemonic##2, 4S, 2D, kInputDouble##input); \

1570 }

1571

1572 #define DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(mnemonic, input) \

1573 SIMTEST(mnemonic##_B) { \

1574 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, B, H, kInput16bits##input); \

1575 } \

1576 SIMTEST(mnemonic##_H) { \

1577 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, H, S, kInput32bits##input); \

1578 } \

1579 SIMTEST(mnemonic##_S) { \

1580 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, D, kInput64bits##input); \

1581 }

1582

1583 #define DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(mnemonic, input) \

1584 SIMTEST(mnemonic##_S) { \

1585 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, S, 2S, kInputFloat##input); \

1586 } \

1587 SIMTEST(mnemonic##_D) { \

1588 CALL_TEST_NEON_HELPER_2DIFF(mnemonic, D, 2D, kInputDouble##input); \

1589 }

1590

1591 #define CALL_TEST_NEON_HELPER_3SAME(mnemonic, variant, input_d, input_nm) \

1592 { \

1593 CALL_TEST_NEON_HELPER_2Op(mnemonic, variant, variant, variant, input_d, \

1594 input_nm, input_nm); \

1595 }

1596

1597 #define DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \

1598 SIMTEST(mnemonic##_8B) { \

1599 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8B, kInput8bitsAccDestination, \

1600 kInput8bits##input); \

1601 } \

1602 SIMTEST(mnemonic##_16B) { \

1603 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 16B, kInput8bitsAccDestination, \

1604 kInput8bits##input); \

1605 }

1606

1607 #define DEFINE_TEST_NEON_3SAME_HS(mnemonic, input) \

1608 SIMTEST(mnemonic##_4H) { \

1609 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4H, kInput16bitsAccDestination, \

1610 kInput16bits##input); \

1611 } \

1612 SIMTEST(mnemonic##_8H) { \

1613 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 8H, kInput16bitsAccDestination, \

1614 kInput16bits##input); \

1615 } \

1616 SIMTEST(mnemonic##_2S) { \

1617 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, kInput32bitsAccDestination, \

1618 kInput32bits##input); \

1619 } \

1620 SIMTEST(mnemonic##_4S) { \

1621 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, kInput32bitsAccDestination, \

1622 kInput32bits##input); \

1623 }

1624

1625 #define DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \

1626 DEFINE_TEST_NEON_3SAME_8B_16B(mnemonic, input) \

1627 DEFINE_TEST_NEON_3SAME_HS(mnemonic, input)

1628

1629 #define DEFINE_TEST_NEON_3SAME(mnemonic, input) \

1630 DEFINE_TEST_NEON_3SAME_NO2D(mnemonic, input) \

1631 SIMTEST(mnemonic##_2D) { \

1632 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, kInput64bitsAccDestination, \

1633 kInput64bits##input); \

1634 }

1635

1636 #define DEFINE_TEST_NEON_3SAME_FP(mnemonic, input) \

1637 SIMTEST(mnemonic##_2S) { \

1638 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2S, kInputFloatAccDestination, \

1639 kInputFloat##input); \

1640 } \

1641 SIMTEST(mnemonic##_4S) { \

1642 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 4S, kInputFloatAccDestination, \

1643 kInputFloat##input); \

1644 } \

1645 SIMTEST(mnemonic##_2D) { \

1646 CALL_TEST_NEON_HELPER_3SAME(mnemonic, 2D, kInputDoubleAccDestination, \

1647 kInputDouble##input); \

1648 }

1649

1650 #define DEFINE_TEST_NEON_3SAME_SCALAR_D(mnemonic, input) \

1651 SIMTEST(mnemonic##_D) { \

1652 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInput64bitsAccDestination, \

1653 kInput64bits##input); \

1654 }

1655

1656 #define DEFINE_TEST_NEON_3SAME_SCALAR_HS(mnemonic, input) \

1657 SIMTEST(mnemonic##_H) { \

1658 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, kInput16bitsAccDestination, \

1659 kInput16bits##input); \

1660 } \

1661 SIMTEST(mnemonic##_S) { \

1662 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInput32bitsAccDestination, \

1663 kInput32bits##input); \

1664 }

1665

1666 #define DEFINE_TEST_NEON_3SAME_SCALAR(mnemonic, input) \

1667 SIMTEST(mnemonic##_B) { \

1668 CALL_TEST_NEON_HELPER_3SAME(mnemonic, B, kInput8bitsAccDestination, \

1669 kInput8bits##input); \

1670 } \

1671 SIMTEST(mnemonic##_H) { \

1672 CALL_TEST_NEON_HELPER_3SAME(mnemonic, H, kInput16bitsAccDestination, \

1673 kInput16bits##input); \

1674 } \

1675 SIMTEST(mnemonic##_S) { \

1676 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInput32bitsAccDestination, \

1677 kInput32bits##input); \

1678 } \

1679 SIMTEST(mnemonic##_D) { \

1680 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInput64bitsAccDestination, \

1681 kInput64bits##input); \

1682 }

1683

1684 #define DEFINE_TEST_NEON_3SAME_FP_SCALAR(mnemonic, input) \

1685 SIMTEST(mnemonic##_S) { \

1686 CALL_TEST_NEON_HELPER_3SAME(mnemonic, S, kInputFloatAccDestination, \

1687 kInputFloat##input); \

1688 } \

1689 SIMTEST(mnemonic##_D) { \

1690 CALL_TEST_NEON_HELPER_3SAME(mnemonic, D, kInputDoubleAccDestination, \

1691 kInputDouble##input); \

1692 }

1693

1694 #define CALL_TEST_NEON_HELPER_3DIFF(mnemonic, vdform, vnform, vmform, input_d, \

1695 input_n, input_m) \

1696 { \

1697 CALL_TEST_NEON_HELPER_2Op(mnemonic, vdform, vnform, vmform, input_d, \

1698 input_n, input_m); \

1699 }

1700

1701 #define DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \

1702 SIMTEST(mnemonic##_8H) { \

1703 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8B, 8B, \

1704 kInput16bitsAccDestination, \

1705 kInput8bits##input, kInput8bits##input); \

1706 } \

1707 SIMTEST(mnemonic##2_8H) { \

1708 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 16B, 16B, \

1709 kInput16bitsAccDestination, \

1710 kInput8bits##input, kInput8bits##input); \

1711 }

1712

1713 #define DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \

1714 SIMTEST(mnemonic##_4S) { \

1715 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4H, 4H, \

1716 kInput32bitsAccDestination, \

1717 kInput16bits##input, kInput16bits##input); \

1718 } \

1719 SIMTEST(mnemonic##2_4S) { \

1720 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 8H, 8H, \

1721 kInput32bitsAccDestination, \

1722 kInput16bits##input, kInput16bits##input); \

1723 }

1724

1725 #define DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input) \

1726 SIMTEST(mnemonic##_2D) { \

1727 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2S, 2S, \

1728 kInput64bitsAccDestination, \

1729 kInput32bits##input, kInput32bits##input); \

1730 } \

1731 SIMTEST(mnemonic##2_2D) { \

1732 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 4S, 4S, \

1733 kInput64bitsAccDestination, \

1734 kInput32bits##input, kInput32bits##input); \

1735 }

1736

1737 #define DEFINE_TEST_NEON_3DIFF_LONG_SD(mnemonic, input) \

1738 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \

1739 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)

1740

1741 #define DEFINE_TEST_NEON_3DIFF_LONG(mnemonic, input) \

1742 DEFINE_TEST_NEON_3DIFF_LONG_8H(mnemonic, input) \

1743 DEFINE_TEST_NEON_3DIFF_LONG_4S(mnemonic, input) \

1744 DEFINE_TEST_NEON_3DIFF_LONG_2D(mnemonic, input)

1745

1746 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \

1747 SIMTEST(mnemonic##_S) { \

1748 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, S, H, H, kInput32bitsAccDestination, \

1749 kInput16bits##input, kInput16bits##input); \

1750 }

1751

1752 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input) \

1753 SIMTEST(mnemonic##_D) { \

1754 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, D, S, S, kInput64bitsAccDestination, \

1755 kInput32bits##input, kInput32bits##input); \

1756 }

1757

1758 #define DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(mnemonic, input) \

1759 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_S(mnemonic, input) \

1760 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_D(mnemonic, input)

1761

1762 #define DEFINE_TEST_NEON_3DIFF_WIDE(mnemonic, input) \

1763 SIMTEST(mnemonic##_8H) { \

1764 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8H, 8H, 8B, \

1765 kInput16bitsAccDestination, \

1766 kInput16bits##input, kInput8bits##input); \

1767 } \

1768 SIMTEST(mnemonic##_4S) { \

1769 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4S, 4S, 4H, \

1770 kInput32bitsAccDestination, \

1771 kInput32bits##input, kInput16bits##input); \

1772 } \

1773 SIMTEST(mnemonic##_2D) { \

1774 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2D, 2D, 2S, \

1775 kInput64bitsAccDestination, \

1776 kInput64bits##input, kInput32bits##input); \

1777 } \

1778 SIMTEST(mnemonic##2_8H) { \

1779 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 8H, 16B, \

1780 kInput16bitsAccDestination, \

1781 kInput16bits##input, kInput8bits##input); \

1782 } \

1783 SIMTEST(mnemonic##2_4S) { \

1784 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 4S, 8H, \

1785 kInput32bitsAccDestination, \

1786 kInput32bits##input, kInput16bits##input); \

1787 } \

1788 SIMTEST(mnemonic##2_2D) { \

1789 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 2D, 2D, 4S, \

1790 kInput64bitsAccDestination, \

1791 kInput64bits##input, kInput32bits##input); \

1792 }

1793

1794 #define DEFINE_TEST_NEON_3DIFF_NARROW(mnemonic, input) \

1795 SIMTEST(mnemonic##_8B) { \

1796 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 8B, 8H, 8H, \

1797 kInput8bitsAccDestination, \

1798 kInput16bits##input, kInput16bits##input); \

1799 } \

1800 SIMTEST(mnemonic##_4H) { \

1801 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 4H, 4S, 4S, \

1802 kInput16bitsAccDestination, \

1803 kInput32bits##input, kInput32bits##input); \

1804 } \

1805 SIMTEST(mnemonic##_2S) { \

1806 CALL_TEST_NEON_HELPER_3DIFF(mnemonic, 2S, 2D, 2D, \

1807 kInput32bitsAccDestination, \

1808 kInput64bits##input, kInput64bits##input); \

1809 } \

1810 SIMTEST(mnemonic##2_16B) { \

1811 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 16B, 8H, 8H, \

1812 kInput8bitsAccDestination, \

1813 kInput16bits##input, kInput16bits##input); \

1814 } \

1815 SIMTEST(mnemonic##2_8H) { \

1816 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 8H, 4S, 4S, \

1817 kInput16bitsAccDestination, \

1818 kInput32bits##input, kInput32bits##input); \

1819 } \

1820 SIMTEST(mnemonic##2_4S) { \

1821 CALL_TEST_NEON_HELPER_3DIFF(mnemonic##2, 4S, 2D, 2D, \

1822 kInput32bitsAccDestination, \

1823 kInput64bits##input, kInput64bits##input); \

1824 }

1825

1826 #define CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, vdform, vnform, input_n, \

1827 input_imm) \

1828 { \

1829 CALL_TEST_NEON_HELPER_2OpImm(mnemonic, vdform, vnform, input_n, \

1830 input_imm); \

1831 }

1832

1833 #define DEFINE_TEST_NEON_2OPIMM(mnemonic, input, input_imm) \

1834 SIMTEST(mnemonic##_8B_2OPIMM) { \

1835 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, 8B, kInput8bits##input, \

1836 kInput8bitsImm##input_imm); \

1837 } \

1838 SIMTEST(mnemonic##_16B_2OPIMM) { \

1839 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 16B, 16B, kInput8bits##input, \

1840 kInput8bitsImm##input_imm); \

1841 } \

1842 SIMTEST(mnemonic##_4H_2OPIMM) { \

1843 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, 4H, kInput16bits##input, \

1844 kInput16bitsImm##input_imm); \

1845 } \

1846 SIMTEST(mnemonic##_8H_2OPIMM) { \

1847 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, 8H, kInput16bits##input, \

1848 kInput16bitsImm##input_imm); \

1849 } \

1850 SIMTEST(mnemonic##_2S_2OPIMM) { \

1851 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInput32bits##input, \

1852 kInput32bitsImm##input_imm); \

1853 } \

1854 SIMTEST(mnemonic##_4S_2OPIMM) { \

1855 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInput32bits##input, \

1856 kInput32bitsImm##input_imm); \

1857 } \

1858 SIMTEST(mnemonic##_2D_2OPIMM) { \

1859 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInput64bits##input, \

1860 kInput64bitsImm##input_imm); \

1861 }

1862

1863 #define DEFINE_TEST_NEON_2OPIMM_COPY(mnemonic, input, input_imm) \

1864 SIMTEST(mnemonic##_8B_2OPIMM) { \

1865 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, B, kInput8bits##input, \

1866 kInput8bitsImm##input_imm); \

1867 } \

1868 SIMTEST(mnemonic##_16B_2OPIMM) { \

1869 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 16B, B, kInput8bits##input, \

1870 kInput8bitsImm##input_imm); \

1871 } \

1872 SIMTEST(mnemonic##_4H_2OPIMM) { \

1873 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, H, kInput16bits##input, \

1874 kInput16bitsImm##input_imm); \

1875 } \

1876 SIMTEST(mnemonic##_8H_2OPIMM) { \

1877 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, H, kInput16bits##input, \

1878 kInput16bitsImm##input_imm); \

1879 } \

1880 SIMTEST(mnemonic##_2S_2OPIMM) { \

1881 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, S, kInput32bits##input, \

1882 kInput32bitsImm##input_imm); \

1883 } \

1884 SIMTEST(mnemonic##_4S_2OPIMM) { \

1885 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, S, kInput32bits##input, \

1886 kInput32bitsImm##input_imm); \

1887 } \

1888 SIMTEST(mnemonic##_2D_2OPIMM) { \

1889 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, D, kInput64bits##input, \

1890 kInput64bitsImm##input_imm); \

1891 }

1892

1893 #define DEFINE_TEST_NEON_2OPIMM_NARROW(mnemonic, input, input_imm) \

1894 SIMTEST(mnemonic##_8B_2OPIMM) { \

1895 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8B, 8H, kInput16bits##input, \

1896 kInput8bitsImm##input_imm); \

1897 } \

1898 SIMTEST(mnemonic##_4H_2OPIMM) { \

1899 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4H, 4S, kInput32bits##input, \

1900 kInput16bitsImm##input_imm); \

1901 } \

1902 SIMTEST(mnemonic##_2S_2OPIMM) { \

1903 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2D, kInput64bits##input, \

1904 kInput32bitsImm##input_imm); \

1905 } \

1906 SIMTEST(mnemonic##2_16B_2OPIMM) { \

1907 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 16B, 8H, kInput16bits##input, \

1908 kInput8bitsImm##input_imm); \

1909 } \

1910 SIMTEST(mnemonic##2_8H_2OPIMM) { \

1911 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 8H, 4S, kInput32bits##input, \

1912 kInput16bitsImm##input_imm); \

1913 } \

1914 SIMTEST(mnemonic##2_4S_2OPIMM) { \

1915 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 4S, 2D, kInput64bits##input, \

1916 kInput32bitsImm##input_imm); \

1917 }

1918

1919 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(mnemonic, input, input_imm) \

1920 SIMTEST(mnemonic##_B_2OPIMM) { \

1921 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, B, H, kInput16bits##input, \

1922 kInput8bitsImm##input_imm); \

1923 } \

1924 SIMTEST(mnemonic##_H_2OPIMM) { \

1925 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, H, S, kInput32bits##input, \

1926 kInput16bitsImm##input_imm); \

1927 } \

1928 SIMTEST(mnemonic##_S_2OPIMM) { \

1929 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, D, kInput64bits##input, \

1930 kInput32bitsImm##input_imm); \

1931 }

1932

1933 #define DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(mnemonic, input, input_imm) \

1934 SIMTEST(mnemonic##_2S_2OPIMM) { \

1935 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInputFloat##Basic, \

1936 kInputDoubleImm##input_imm) \

1937 } \

1938 SIMTEST(mnemonic##_4S_2OPIMM) { \

1939 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInputFloat##input, \

1940 kInputDoubleImm##input_imm); \

1941 } \

1942 SIMTEST(mnemonic##_2D_2OPIMM) { \

1943 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInputDouble##input, \

1944 kInputDoubleImm##input_imm); \

1945 }

1946

1947 #define DEFINE_TEST_NEON_2OPIMM_FP(mnemonic, input, input_imm) \

1948 SIMTEST(mnemonic##_2S_2OPIMM) { \

1949 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInputFloat##Basic, \

1950 kInput32bitsImm##input_imm) \

1951 } \

1952 SIMTEST(mnemonic##_4S_2OPIMM) { \

1953 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInputFloat##input, \

1954 kInput32bitsImm##input_imm) \

1955 } \

1956 SIMTEST(mnemonic##_2D_2OPIMM) { \

1957 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInputDouble##input, \

1958 kInput64bitsImm##input_imm) \

1959 }

1960

1961 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(mnemonic, input, input_imm) \

1962 SIMTEST(mnemonic##_S_2OPIMM) { \

1963 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInputFloat##Basic, \

1964 kInput32bitsImm##input_imm) \

1965 } \

1966 SIMTEST(mnemonic##_D_2OPIMM) { \

1967 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInputDouble##input, \

1968 kInput64bitsImm##input_imm) \

1969 }

1970

1971 #define DEFINE_TEST_NEON_2OPIMM_SD(mnemonic, input, input_imm) \

1972 SIMTEST(mnemonic##_2S_2OPIMM) { \

1973 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2S, 2S, kInput32bits##input, \

1974 kInput32bitsImm##input_imm); \

1975 } \

1976 SIMTEST(mnemonic##_4S_2OPIMM) { \

1977 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4S, kInput32bits##input, \

1978 kInput32bitsImm##input_imm); \

1979 } \

1980 SIMTEST(mnemonic##_2D_2OPIMM) { \

1981 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2D, kInput64bits##input, \

1982 kInput64bitsImm##input_imm); \

1983 }

1984

1985 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm) \

1986 SIMTEST(mnemonic##_D_2OPIMM) { \

1987 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInput64bits##input, \

1988 kInput64bitsImm##input_imm); \

1989 }

1990

1991 #define DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm) \

1992 SIMTEST(mnemonic##_S_2OPIMM) { \

1993 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInput32bits##input, \

1994 kInput32bitsImm##input_imm); \

1995 } \

1996 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(mnemonic, input, input_imm)

1997

1998 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm) \

1999 SIMTEST(mnemonic##_D_2OPIMM) { \

2000 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, D, D, kInputDouble##input, \

2001 kInputDoubleImm##input_imm); \

2002 }

2003

2004 #define DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(mnemonic, input, input_imm) \

2005 SIMTEST(mnemonic##_S_2OPIMM) { \

2006 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, S, S, kInputFloat##input, \

2007 kInputDoubleImm##input_imm); \

2008 } \

2009 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_D(mnemonic, input, input_imm)

2010

2011 #define DEFINE_TEST_NEON_2OPIMM_SCALAR(mnemonic, input, input_imm) \

2012 SIMTEST(mnemonic##_B_2OPIMM) { \

2013 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, B, B, kInput8bits##input, \

2014 kInput8bitsImm##input_imm); \

2015 } \

2016 SIMTEST(mnemonic##_H_2OPIMM) { \

2017 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, H, H, kInput16bits##input, \

2018 kInput16bitsImm##input_imm); \

2019 } \

2020 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(mnemonic, input, input_imm)

2021

2022 #define DEFINE_TEST_NEON_2OPIMM_LONG(mnemonic, input, input_imm) \

2023 SIMTEST(mnemonic##_8H_2OPIMM) { \

2024 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 8H, 8B, kInput8bits##input, \

2025 kInput8bitsImm##input_imm); \

2026 } \

2027 SIMTEST(mnemonic##_4S_2OPIMM) { \

2028 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 4S, 4H, kInput16bits##input, \

2029 kInput16bitsImm##input_imm); \

2030 } \

2031 SIMTEST(mnemonic##_2D_2OPIMM) { \

2032 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic, 2D, 2S, kInput32bits##input, \

2033 kInput32bitsImm##input_imm); \

2034 } \

2035 SIMTEST(mnemonic##2_8H_2OPIMM) { \

2036 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 8H, 16B, kInput8bits##input, \

2037 kInput8bitsImm##input_imm); \

2038 } \

2039 SIMTEST(mnemonic##2_4S_2OPIMM) { \

2040 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 4S, 8H, kInput16bits##input, \

2041 kInput16bitsImm##input_imm); \

2042 } \

2043 SIMTEST(mnemonic##2_2D_2OPIMM) { \

2044 CALL_TEST_NEON_HELPER_2OPIMM(mnemonic##2, 2D, 4S, kInput32bits##input, \

2045 kInput32bitsImm##input_imm); \

2046 }

2047

2048 #define CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, vdform, vnform, vmform, \

2049 input_d, input_n, input_m, indices) \

2050 { \

2051 CALL_TEST_NEON_HELPER_ByElement(mnemonic, vdform, vnform, vmform, input_d, \

2052 input_n, input_m, indices); \

2053 }

2054

2055 #define DEFINE_TEST_NEON_BYELEMENT(mnemonic, input_d, input_n, input_m) \

2056 SIMTEST(mnemonic##_4H_4H_H) { \

2057 CALL_TEST_NEON_HELPER_BYELEMENT( \

2058 mnemonic, 4H, 4H, H, kInput16bits##input_d, kInput16bits##input_n, \

2059 kInput16bits##input_m, kInputHIndices); \

2060 } \

2061 SIMTEST(mnemonic##_8H_8H_H) { \

2062 CALL_TEST_NEON_HELPER_BYELEMENT( \

2063 mnemonic, 8H, 8H, H, kInput16bits##input_d, kInput16bits##input_n, \

2064 kInput16bits##input_m, kInputHIndices); \

2065 } \

2066 SIMTEST(mnemonic##_2S_2S_S) { \

2067 CALL_TEST_NEON_HELPER_BYELEMENT( \

2068 mnemonic, 2S, 2S, S, kInput32bits##input_d, kInput32bits##input_n, \

2069 kInput32bits##input_m, kInputSIndices); \

2070 } \

2071 SIMTEST(mnemonic##_4S_4S_S) { \

2072 CALL_TEST_NEON_HELPER_BYELEMENT( \

2073 mnemonic, 4S, 4S, S, kInput32bits##input_d, kInput32bits##input_n, \

2074 kInput32bits##input_m, kInputSIndices); \

2075 }

2076

2077 #define DEFINE_TEST_NEON_BYELEMENT_SCALAR(mnemonic, input_d, input_n, input_m) \

2078 SIMTEST(mnemonic##_H_H_H) { \

2079 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, H, H, H, kInput16bits##input_d, \

2080 kInput16bits##input_n, \

2081 kInput16bits##input_m, kInputHIndices); \

2082 } \

2083 SIMTEST(mnemonic##_S_S_S) { \

2084 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, S, S, kInput32bits##input_d, \

2085 kInput32bits##input_n, \

2086 kInput32bits##input_m, kInputSIndices); \

2087 }

2088

2089 #define DEFINE_TEST_NEON_FP_BYELEMENT(mnemonic, input_d, input_n, input_m) \

2090 SIMTEST(mnemonic##_2S_2S_S) { \

2091 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, 2S, 2S, S, kInputFloat##input_d, \

2092 kInputFloat##input_n, \

2093 kInputFloat##input_m, kInputSIndices); \

2094 } \

2095 SIMTEST(mnemonic##_4S_4S_S) { \

2096 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, 4S, 4S, S, kInputFloat##input_d, \

2097 kInputFloat##input_n, \

2098 kInputFloat##input_m, kInputSIndices); \

2099 } \

2100 SIMTEST(mnemonic##_2D_2D_D) { \

2101 CALL_TEST_NEON_HELPER_BYELEMENT( \

2102 mnemonic, 2D, 2D, D, kInputDouble##input_d, kInputDouble##input_n, \

2103 kInputDouble##input_m, kInputDIndices); \

2104 }

2105

2106 #define DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(mnemonic, inp_d, inp_n, inp_m) \

2107 SIMTEST(mnemonic##_S_S_S) { \

2108 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, S, S, kInputFloat##inp_d, \

2109 kInputFloat##inp_n, kInputFloat##inp_m, \

2110 kInputSIndices); \

2111 } \

2112 SIMTEST(mnemonic##_D_D_D) { \

2113 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, D, D, D, kInputDouble##inp_d, \

2114 kInputDouble##inp_n, kInputDouble##inp_m, \

2115 kInputDIndices); \

2116 }

2117

2118 #define DEFINE_TEST_NEON_BYELEMENT_DIFF(mnemonic, input_d, input_n, input_m) \

2119 SIMTEST(mnemonic##_4S_4H_H) { \

2120 CALL_TEST_NEON_HELPER_BYELEMENT( \

2121 mnemonic, 4S, 4H, H, kInput32bits##input_d, kInput16bits##input_n, \

2122 kInput16bits##input_m, kInputHIndices); \

2123 } \

2124 SIMTEST(mnemonic##2_4S_8H_H) { \

2125 CALL_TEST_NEON_HELPER_BYELEMENT( \

2126 mnemonic##2, 4S, 8H, H, kInput32bits##input_d, kInput16bits##input_n, \

2127 kInput16bits##input_m, kInputHIndices); \

2128 } \

2129 SIMTEST(mnemonic##_2D_2S_S) { \

2130 CALL_TEST_NEON_HELPER_BYELEMENT( \

2131 mnemonic, 2D, 2S, S, kInput64bits##input_d, kInput32bits##input_n, \

2132 kInput32bits##input_m, kInputSIndices); \

2133 } \

2134 SIMTEST(mnemonic##2_2D_4S_S) { \

2135 CALL_TEST_NEON_HELPER_BYELEMENT( \

2136 mnemonic##2, 2D, 4S, S, kInput64bits##input_d, kInput32bits##input_n, \

2137 kInput32bits##input_m, kInputSIndices); \

2138 }

2139

2140 #define DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(mnemonic, input_d, input_n, \

2141 input_m) \

2142 SIMTEST(mnemonic##_S_H_H) { \

2143 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, S, H, H, kInput32bits##input_d, \

2144 kInput16bits##input_n, \

2145 kInput16bits##input_m, kInputHIndices); \

2146 } \

2147 SIMTEST(mnemonic##_D_S_S) { \

2148 CALL_TEST_NEON_HELPER_BYELEMENT(mnemonic, D, S, S, kInput64bits##input_d, \

2149 kInput32bits##input_n, \

2150 kInput32bits##input_m, kInputSIndices); \

2151 }

2152

2153 #define CALL_TEST_NEON_HELPER_2OP2IMM(mnemonic, variant, input_d, input_imm1, \

2154 input_n, input_imm2) \

2155 { \

2156 CALL_TEST_NEON_HELPER_OpImmOpImm(&MacroAssembler::mnemonic, mnemonic, \

2157 variant, variant, input_d, input_imm1, \

2158 input_n, input_imm2); \

2159 }

2160

2161 #define DEFINE_TEST_NEON_2OP2IMM(mnemonic, input_d, input_imm1, input_n, \

2162 input_imm2) \

2163 SIMTEST(mnemonic##_B) { \

2164 CALL_TEST_NEON_HELPER_2OP2IMM( \

2165 mnemonic, 16B, kInput8bits##input_d, kInput8bitsImm##input_imm1, \

2166 kInput8bits##input_n, kInput8bitsImm##input_imm2); \

2167 } \

2168 SIMTEST(mnemonic##_H) { \

2169 CALL_TEST_NEON_HELPER_2OP2IMM( \

2170 mnemonic, 8H, kInput16bits##input_d, kInput16bitsImm##input_imm1, \

2171 kInput16bits##input_n, kInput16bitsImm##input_imm2); \

2172 } \

2173 SIMTEST(mnemonic##_S) { \

2174 CALL_TEST_NEON_HELPER_2OP2IMM( \

2175 mnemonic, 4S, kInput32bits##input_d, kInput32bitsImm##input_imm1, \

2176 kInput32bits##input_n, kInput32bitsImm##input_imm2); \

2177 } \

2178 SIMTEST(mnemonic##_D) { \

2179 CALL_TEST_NEON_HELPER_2OP2IMM( \

2180 mnemonic, 2D, kInput64bits##input_d, kInput64bitsImm##input_imm1, \

2181 kInput64bits##input_n, kInput64bitsImm##input_imm2); \

2182 }

2183

2184 // clang-format on

2185

2186 // Advanced SIMD copy.

2187 DEFINE_TEST_NEON_2OP2IMM(ins, Basic, LaneCountFromZero, Basic,

2188 LaneCountFromZero)

2189 DEFINE_TEST_NEON_2OPIMM_COPY(dup, Basic, LaneCountFromZero)

2190

2191 // Advanced SIMD scalar copy.

2192 DEFINE_TEST_NEON_2OPIMM_SCALAR(dup, Basic, LaneCountFromZero)

2193

2194 // Advanced SIMD three same.

2195 DEFINE_TEST_NEON_3SAME_NO2D(shadd, Basic)

2196 DEFINE_TEST_NEON_3SAME(sqadd, Basic)

2197 DEFINE_TEST_NEON_3SAME_NO2D(srhadd, Basic)

2198 DEFINE_TEST_NEON_3SAME_NO2D(shsub, Basic)

2199 DEFINE_TEST_NEON_3SAME(sqsub, Basic)

2200 DEFINE_TEST_NEON_3SAME(cmgt, Basic)

2201 DEFINE_TEST_NEON_3SAME(cmge, Basic)

2202 DEFINE_TEST_NEON_3SAME(sshl, Basic)

2203 DEFINE_TEST_NEON_3SAME(sqshl, Basic)

2204 DEFINE_TEST_NEON_3SAME(srshl, Basic)

2205 DEFINE_TEST_NEON_3SAME(sqrshl, Basic)

2206 DEFINE_TEST_NEON_3SAME_NO2D(smax, Basic)

2207 DEFINE_TEST_NEON_3SAME_NO2D(smin, Basic)

2208 DEFINE_TEST_NEON_3SAME_NO2D(sabd, Basic)

2209 DEFINE_TEST_NEON_3SAME_NO2D(saba, Basic)

2210 DEFINE_TEST_NEON_3SAME(add, Basic)

2211 DEFINE_TEST_NEON_3SAME(cmtst, Basic)

2212 DEFINE_TEST_NEON_3SAME_NO2D(mla, Basic)

2213 DEFINE_TEST_NEON_3SAME_NO2D(mul, Basic)

2214 DEFINE_TEST_NEON_3SAME_NO2D(smaxp, Basic)

2215 DEFINE_TEST_NEON_3SAME_NO2D(sminp, Basic)

2216 DEFINE_TEST_NEON_3SAME_HS(sqdmulh, Basic)

2217 DEFINE_TEST_NEON_3SAME(addp, Basic)

2218 DEFINE_TEST_NEON_3SAME_FP(fmaxnm, Basic)

2219 DEFINE_TEST_NEON_3SAME_FP(fmla, Basic)

2220 DEFINE_TEST_NEON_3SAME_FP(fadd, Basic)

2221 DEFINE_TEST_NEON_3SAME_FP(fmulx, Basic)

2222 DEFINE_TEST_NEON_3SAME_FP(fcmeq, Basic)

2223 DEFINE_TEST_NEON_3SAME_FP(fmax, Basic)

2224 DEFINE_TEST_NEON_3SAME_FP(frecps, Basic)

2225 DEFINE_TEST_NEON_3SAME_8B_16B(and_, Basic)

2226 DEFINE_TEST_NEON_3SAME_8B_16B(bic, Basic)

2227 DEFINE_TEST_NEON_3SAME_FP(fminnm, Basic)

2228 DEFINE_TEST_NEON_3SAME_FP(fmls, Basic)

2229 DEFINE_TEST_NEON_3SAME_FP(fsub, Basic)

2230 DEFINE_TEST_NEON_3SAME_FP(fmin, Basic)

2231 DEFINE_TEST_NEON_3SAME_FP(frsqrts, Basic)

2232 DEFINE_TEST_NEON_3SAME_8B_16B(orr, Basic)

2233 DEFINE_TEST_NEON_3SAME_8B_16B(orn, Basic)

2234 DEFINE_TEST_NEON_3SAME_NO2D(uhadd, Basic)

2235 DEFINE_TEST_NEON_3SAME(uqadd, Basic)

2236 DEFINE_TEST_NEON_3SAME_NO2D(urhadd, Basic)

2237 DEFINE_TEST_NEON_3SAME_NO2D(uhsub, Basic)

2238 DEFINE_TEST_NEON_3SAME(uqsub, Basic)

2239 DEFINE_TEST_NEON_3SAME(cmhi, Basic)

2240 DEFINE_TEST_NEON_3SAME(cmhs, Basic)

2241 DEFINE_TEST_NEON_3SAME(ushl, Basic)

2242 DEFINE_TEST_NEON_3SAME(uqshl, Basic)

2243 DEFINE_TEST_NEON_3SAME(urshl, Basic)

2244 DEFINE_TEST_NEON_3SAME(uqrshl, Basic)

2245 DEFINE_TEST_NEON_3SAME_NO2D(umax, Basic)

2246 DEFINE_TEST_NEON_3SAME_NO2D(umin, Basic)

2247 DEFINE_TEST_NEON_3SAME_NO2D(uabd, Basic)

2248 DEFINE_TEST_NEON_3SAME_NO2D(uaba, Basic)

2249 DEFINE_TEST_NEON_3SAME(sub, Basic)

2250 DEFINE_TEST_NEON_3SAME(cmeq, Basic)

2251 DEFINE_TEST_NEON_3SAME_NO2D(mls, Basic)

2252 DEFINE_TEST_NEON_3SAME_8B_16B(pmul, Basic)

2253 DEFINE_TEST_NEON_3SAME_NO2D(uminp, Basic)

2254 DEFINE_TEST_NEON_3SAME_NO2D(umaxp, Basic)

2255 DEFINE_TEST_NEON_3SAME_HS(sqrdmulh, Basic)

2256 DEFINE_TEST_NEON_3SAME_FP(fmaxnmp, Basic)

2257 DEFINE_TEST_NEON_3SAME_FP(faddp, Basic)

2258 DEFINE_TEST_NEON_3SAME_FP(fmul, Basic)

2259 DEFINE_TEST_NEON_3SAME_FP(fcmge, Basic)

2260 DEFINE_TEST_NEON_3SAME_FP(facge, Basic)

2261 DEFINE_TEST_NEON_3SAME_FP(fmaxp, Basic)

2262 DEFINE_TEST_NEON_3SAME_FP(fdiv, Basic)

2263 DEFINE_TEST_NEON_3SAME_8B_16B(eor, Basic)

2264 DEFINE_TEST_NEON_3SAME_8B_16B(bsl, Basic)

2265 DEFINE_TEST_NEON_3SAME_FP(fminnmp, Basic)

2266 DEFINE_TEST_NEON_3SAME_FP(fabd, Basic)

2267 DEFINE_TEST_NEON_3SAME_FP(fcmgt, Basic)

2268 DEFINE_TEST_NEON_3SAME_FP(facgt, Basic)

2269 DEFINE_TEST_NEON_3SAME_FP(fminp, Basic)

2270 DEFINE_TEST_NEON_3SAME_8B_16B(bit, Basic)

2271 DEFINE_TEST_NEON_3SAME_8B_16B(bif, Basic)

2272

2273 // Advanced SIMD scalar three same.

2274 DEFINE_TEST_NEON_3SAME_SCALAR(sqadd, Basic)

2275 DEFINE_TEST_NEON_3SAME_SCALAR(sqsub, Basic)

2276 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmgt, Basic)

2277 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmge, Basic)

2278 DEFINE_TEST_NEON_3SAME_SCALAR_D(sshl, Basic)

2279 DEFINE_TEST_NEON_3SAME_SCALAR(sqshl, Basic)

2280 DEFINE_TEST_NEON_3SAME_SCALAR_D(srshl, Basic)

2281 DEFINE_TEST_NEON_3SAME_SCALAR(sqrshl, Basic)

2282 DEFINE_TEST_NEON_3SAME_SCALAR_D(add, Basic)

2283 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmtst, Basic)

2284 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqdmulh, Basic)

2285 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fmulx, Basic)

2286 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmeq, Basic)

2287 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frecps, Basic)

2288 DEFINE_TEST_NEON_3SAME_FP_SCALAR(frsqrts, Basic)

2289 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqadd, Basic)

2290 DEFINE_TEST_NEON_3SAME_SCALAR_D(uqsub, Basic)

2291 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhi, Basic)

2292 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmhs, Basic)

2293 DEFINE_TEST_NEON_3SAME_SCALAR_D(ushl, Basic)

2294 DEFINE_TEST_NEON_3SAME_SCALAR(uqshl, Basic)

2295 DEFINE_TEST_NEON_3SAME_SCALAR_D(urshl, Basic)

2296 DEFINE_TEST_NEON_3SAME_SCALAR(uqrshl, Basic)

2297 DEFINE_TEST_NEON_3SAME_SCALAR_D(sub, Basic)

2298 DEFINE_TEST_NEON_3SAME_SCALAR_D(cmeq, Basic)

2299 DEFINE_TEST_NEON_3SAME_SCALAR_HS(sqrdmulh, Basic)

2300 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmge, Basic)

2301 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facge, Basic)

2302 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fabd, Basic)

2303 DEFINE_TEST_NEON_3SAME_FP_SCALAR(fcmgt, Basic)

2304 DEFINE_TEST_NEON_3SAME_FP_SCALAR(facgt, Basic)

2305

2306 // Advanced SIMD three different.

2307 DEFINE_TEST_NEON_3DIFF_LONG(saddl, Basic)

2308 DEFINE_TEST_NEON_3DIFF_WIDE(saddw, Basic)

2309 DEFINE_TEST_NEON_3DIFF_LONG(ssubl, Basic)

2310 DEFINE_TEST_NEON_3DIFF_WIDE(ssubw, Basic)

2311 DEFINE_TEST_NEON_3DIFF_NARROW(addhn, Basic)

2312 DEFINE_TEST_NEON_3DIFF_LONG(sabal, Basic)

2313 DEFINE_TEST_NEON_3DIFF_NARROW(subhn, Basic)

2314 DEFINE_TEST_NEON_3DIFF_LONG(sabdl, Basic)

2315 DEFINE_TEST_NEON_3DIFF_LONG(smlal, Basic)

2316 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlal, Basic)

2317 DEFINE_TEST_NEON_3DIFF_LONG(smlsl, Basic)

2318 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmlsl, Basic)

2319 DEFINE_TEST_NEON_3DIFF_LONG(smull, Basic)

2320 DEFINE_TEST_NEON_3DIFF_LONG_SD(sqdmull, Basic)

2321 DEFINE_TEST_NEON_3DIFF_LONG_8H(pmull, Basic)

2322 DEFINE_TEST_NEON_3DIFF_LONG(uaddl, Basic)

2323 DEFINE_TEST_NEON_3DIFF_WIDE(uaddw, Basic)

2324 DEFINE_TEST_NEON_3DIFF_LONG(usubl, Basic)

2325 DEFINE_TEST_NEON_3DIFF_WIDE(usubw, Basic)

2326 DEFINE_TEST_NEON_3DIFF_NARROW(raddhn, Basic)

2327 DEFINE_TEST_NEON_3DIFF_LONG(uabal, Basic)

2328 DEFINE_TEST_NEON_3DIFF_NARROW(rsubhn, Basic)

2329 DEFINE_TEST_NEON_3DIFF_LONG(uabdl, Basic)

2330 DEFINE_TEST_NEON_3DIFF_LONG(umlal, Basic)

2331 DEFINE_TEST_NEON_3DIFF_LONG(umlsl, Basic)

2332 DEFINE_TEST_NEON_3DIFF_LONG(umull, Basic)

2333

2334 // Advanced SIMD scalar three different.

2335 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlal, Basic)

2336 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmlsl, Basic)

2337 DEFINE_TEST_NEON_3DIFF_SCALAR_LONG_SD(sqdmull, Basic)

2338

2339 // Advanced SIMD scalar pairwise.

2340 SIMTEST(addp_SCALAR) {

2341 CALL_TEST_NEON_HELPER_2DIFF(addp, D, 2D, kInput64bitsBasic);

2342 }

2343 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxnmp, Basic)

2344 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(faddp, Basic)

2345 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fmaxp, Basic)

2346 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminnmp, Basic)

2347 DEFINE_TEST_NEON_2DIFF_FP_SCALAR_SD(fminp, Basic)

2348

2349 // Advanced SIMD shift by immediate.

2350 DEFINE_TEST_NEON_2OPIMM(sshr, Basic, TypeWidth)

2351 DEFINE_TEST_NEON_2OPIMM(ssra, Basic, TypeWidth)

2352 DEFINE_TEST_NEON_2OPIMM(srshr, Basic, TypeWidth)

2353 DEFINE_TEST_NEON_2OPIMM(srsra, Basic, TypeWidth)

2354 DEFINE_TEST_NEON_2OPIMM(shl, Basic, TypeWidthFromZero)

2355 DEFINE_TEST_NEON_2OPIMM(sqshl, Basic, TypeWidthFromZero)

2356 DEFINE_TEST_NEON_2OPIMM_NARROW(shrn, Basic, TypeWidth)

2357 DEFINE_TEST_NEON_2OPIMM_NARROW(rshrn, Basic, TypeWidth)

2358 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrn, Basic, TypeWidth)

2359 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrn, Basic, TypeWidth)

2360 DEFINE_TEST_NEON_2OPIMM_LONG(sshll, Basic, TypeWidthFromZero)

2361 DEFINE_TEST_NEON_2OPIMM_SD(scvtf, FixedPointConversions,

2362 TypeWidthFromZeroToWidth)

2363 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzs, Conversions, TypeWidthFromZeroToWidth)

2364 DEFINE_TEST_NEON_2OPIMM(ushr, Basic, TypeWidth)

2365 DEFINE_TEST_NEON_2OPIMM(usra, Basic, TypeWidth)

2366 DEFINE_TEST_NEON_2OPIMM(urshr, Basic, TypeWidth)

2367 DEFINE_TEST_NEON_2OPIMM(ursra, Basic, TypeWidth)

2368 DEFINE_TEST_NEON_2OPIMM(sri, Basic, TypeWidth)

2369 DEFINE_TEST_NEON_2OPIMM(sli, Basic, TypeWidthFromZero)

2370 DEFINE_TEST_NEON_2OPIMM(sqshlu, Basic, TypeWidthFromZero)

2371 DEFINE_TEST_NEON_2OPIMM(uqshl, Basic, TypeWidthFromZero)

2372 DEFINE_TEST_NEON_2OPIMM_NARROW(sqshrun, Basic, TypeWidth)

2373 DEFINE_TEST_NEON_2OPIMM_NARROW(sqrshrun, Basic, TypeWidth)

2374 DEFINE_TEST_NEON_2OPIMM_NARROW(uqshrn, Basic, TypeWidth)

2375 DEFINE_TEST_NEON_2OPIMM_NARROW(uqrshrn, Basic, TypeWidth)

2376 DEFINE_TEST_NEON_2OPIMM_LONG(ushll, Basic, TypeWidthFromZero)

2377 DEFINE_TEST_NEON_2OPIMM_SD(ucvtf, FixedPointConversions,

2378 TypeWidthFromZeroToWidth)

2379 DEFINE_TEST_NEON_2OPIMM_FP(fcvtzu, Conversions, TypeWidthFromZeroToWidth)

2380

2381 // Advanced SIMD scalar shift by immediate..

2382 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sshr, Basic, TypeWidth)

2383 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ssra, Basic, TypeWidth)

2384 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srshr, Basic, TypeWidth)

2385 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(srsra, Basic, TypeWidth)

2386 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(shl, Basic, TypeWidthFromZero)

2387 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshl, Basic, TypeWidthFromZero)

2388 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrn, Basic, TypeWidth)

2389 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrn, Basic, TypeWidth)

2390 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(scvtf, FixedPointConversions,

2391 TypeWidthFromZeroToWidth)

2392 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzs, Conversions, TypeWidthFromZeroToWidth)

2393 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ushr, Basic, TypeWidth)

2394 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(usra, Basic, TypeWidth)

2395 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(urshr, Basic, TypeWidth)

2396 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(ursra, Basic, TypeWidth)

2397 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sri, Basic, TypeWidth)

2398 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(sli, Basic, TypeWidthFromZero)

2399 DEFINE_TEST_NEON_2OPIMM_SCALAR(sqshlu, Basic, TypeWidthFromZero)

2400 DEFINE_TEST_NEON_2OPIMM_SCALAR(uqshl, Basic, TypeWidthFromZero)

2401 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqshrun, Basic, TypeWidth)

2402 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(sqrshrun, Basic, TypeWidth)

2403 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqshrn, Basic, TypeWidth)

2404 DEFINE_TEST_NEON_2OPIMM_SCALAR_NARROW(uqrshrn, Basic, TypeWidth)

2405 DEFINE_TEST_NEON_2OPIMM_SCALAR_SD(ucvtf, FixedPointConversions,

2406 TypeWidthFromZeroToWidth)

2407 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR(fcvtzu, Conversions, TypeWidthFromZeroToWidth)

2408

2409 // Advanced SIMD two-register miscellaneous.

2410 DEFINE_TEST_NEON_2SAME_NO2D(rev64, Basic)

2411 DEFINE_TEST_NEON_2SAME_8B_16B(rev16, Basic)

2412 DEFINE_TEST_NEON_2DIFF_LONG(saddlp, Basic)

2413 DEFINE_TEST_NEON_2SAME(suqadd, Basic)

2414 DEFINE_TEST_NEON_2SAME_NO2D(cls, Basic)

2415 DEFINE_TEST_NEON_2SAME_8B_16B(cnt, Basic)

2416 DEFINE_TEST_NEON_2DIFF_LONG(sadalp, Basic)

2417 DEFINE_TEST_NEON_2SAME(sqabs, Basic)

2418 DEFINE_TEST_NEON_2OPIMM(cmgt, Basic, Zero)

2419 DEFINE_TEST_NEON_2OPIMM(cmeq, Basic, Zero)

2420 DEFINE_TEST_NEON_2OPIMM(cmlt, Basic, Zero)

2421 DEFINE_TEST_NEON_2SAME(abs, Basic)

2422 DEFINE_TEST_NEON_2DIFF_NARROW(xtn, Basic)

2423 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtn, Basic)

2424 DEFINE_TEST_NEON_2DIFF_FP_NARROW(fcvtn, Conversions)

2425 DEFINE_TEST_NEON_2DIFF_FP_LONG(fcvtl, Conversions)

2426 DEFINE_TEST_NEON_2SAME_FP(frintn, Conversions)

2427 DEFINE_TEST_NEON_2SAME_FP(frintm, Conversions)

2428 DEFINE_TEST_NEON_2SAME_FP(fcvtns, Conversions)

2429 DEFINE_TEST_NEON_2SAME_FP(fcvtms, Conversions)

2430 DEFINE_TEST_NEON_2SAME_FP(fcvtas, Conversions)

2431 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.

2432 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmgt, Basic, Zero)

2433 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmeq, Basic, Zero)

2434 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmlt, Basic, Zero)

2435 DEFINE_TEST_NEON_2SAME_FP(fabs, Basic)

2436 DEFINE_TEST_NEON_2SAME_FP(frintp, Conversions)

2437 DEFINE_TEST_NEON_2SAME_FP(frintz, Conversions)

2438 DEFINE_TEST_NEON_2SAME_FP(fcvtps, Conversions)

2439 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.

2440 DEFINE_TEST_NEON_2SAME_2S_4S(urecpe, Basic)

2441 DEFINE_TEST_NEON_2SAME_FP(frecpe, Basic)

2442 DEFINE_TEST_NEON_2SAME_BH(rev32, Basic)

2443 DEFINE_TEST_NEON_2DIFF_LONG(uaddlp, Basic)

2444 DEFINE_TEST_NEON_2SAME(usqadd, Basic)

2445 DEFINE_TEST_NEON_2SAME_NO2D(clz, Basic)

2446 DEFINE_TEST_NEON_2DIFF_LONG(uadalp, Basic)

2447 DEFINE_TEST_NEON_2SAME(sqneg, Basic)

2448 DEFINE_TEST_NEON_2OPIMM(cmge, Basic, Zero)

2449 DEFINE_TEST_NEON_2OPIMM(cmle, Basic, Zero)

2450 DEFINE_TEST_NEON_2SAME(neg, Basic)

2451 DEFINE_TEST_NEON_2DIFF_NARROW(sqxtun, Basic)

2452 DEFINE_TEST_NEON_2OPIMM_LONG(shll, Basic, SHLL)

2453 DEFINE_TEST_NEON_2DIFF_NARROW(uqxtn, Basic)

2454 DEFINE_TEST_NEON_2DIFF_FP_NARROW_2S(fcvtxn, Conversions)

2455 DEFINE_TEST_NEON_2SAME_FP(frinta, Conversions)

2456 DEFINE_TEST_NEON_2SAME_FP(frintx, Conversions)

2457 DEFINE_TEST_NEON_2SAME_FP(fcvtnu, Conversions)

2458 DEFINE_TEST_NEON_2SAME_FP(fcvtmu, Conversions)

2459 DEFINE_TEST_NEON_2SAME_FP(fcvtau, Conversions)

2460 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.

2461 DEFINE_TEST_NEON_2SAME_8B_16B(not_, Basic)

2462 DEFINE_TEST_NEON_2SAME_8B_16B(rbit, Basic)

2463 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmge, Basic, Zero)

2464 DEFINE_TEST_NEON_2OPIMM_FCMP_ZERO(fcmle, Basic, Zero)

2465 DEFINE_TEST_NEON_2SAME_FP(fneg, Basic)

2466 DEFINE_TEST_NEON_2SAME_FP(frinti, Conversions)

2467 DEFINE_TEST_NEON_2SAME_FP(fcvtpu, Conversions)

2468 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.

2469 DEFINE_TEST_NEON_2SAME_2S_4S(ursqrte, Basic)

2470 DEFINE_TEST_NEON_2SAME_FP(frsqrte, Basic)

2471 DEFINE_TEST_NEON_2SAME_FP(fsqrt, Basic)

2472

2473 // Advanced SIMD scalar two-register miscellaneous.

2474 DEFINE_TEST_NEON_2SAME_SCALAR(suqadd, Basic)

2475 DEFINE_TEST_NEON_2SAME_SCALAR(sqabs, Basic)

2476 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmgt, Basic, Zero)

2477 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmeq, Basic, Zero)

2478 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmlt, Basic, Zero)

2479 DEFINE_TEST_NEON_2SAME_SCALAR_D(abs, Basic)

2480 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtn, Basic)

2481 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtns, Conversions)

2482 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtms, Conversions)

2483 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtas, Conversions)

2484 // SCVTF (vector, integer) covered by SCVTF(vector, fixed point) with fbits 0.

2485 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmgt, Basic, Zero)

2486 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmeq, Basic, Zero)

2487 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmlt, Basic, Zero)

2488 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtps, Conversions)

2489 // FCVTZS(vector, integer) covered by FCVTZS(vector, fixed point) with fbits 0.

2490 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpe, Basic)

2491 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frecpx, Basic)

2492 DEFINE_TEST_NEON_2SAME_SCALAR(usqadd, Basic)

2493 DEFINE_TEST_NEON_2SAME_SCALAR(sqneg, Basic)

2494 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmge, Basic, Zero)

2495 DEFINE_TEST_NEON_2OPIMM_SCALAR_D(cmle, Basic, Zero)

2496 DEFINE_TEST_NEON_2SAME_SCALAR_D(neg, Basic)

2497 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(sqxtun, Basic)

2498 DEFINE_TEST_NEON_2DIFF_SCALAR_NARROW(uqxtn, Basic)

2499 SIMTEST(fcvtxn_SCALAR) {

2500 CALL_TEST_NEON_HELPER_2DIFF(fcvtxn, S, D, kInputDoubleConversions);

2501 }

2502 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtnu, Conversions)

2503 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtmu, Conversions)

2504 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtau, Conversions)

2505 // UCVTF (vector, integer) covered by UCVTF(vector, fixed point) with fbits 0.

2506 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmge, Basic, Zero)

2507 DEFINE_TEST_NEON_2OPIMM_FP_SCALAR_SD(fcmle, Basic, Zero)

2508 DEFINE_TEST_NEON_2SAME_FP_SCALAR(fcvtpu, Conversions)

2509 // FCVTZU(vector, integer) covered by FCVTZU(vector, fixed point) with fbits 0.

2510 DEFINE_TEST_NEON_2SAME_FP_SCALAR(frsqrte, Basic)

2511

2512 // Advanced SIMD across lanes.

2513 DEFINE_TEST_NEON_ACROSS_LONG(saddlv, Basic)

2514 DEFINE_TEST_NEON_ACROSS(smaxv, Basic)

2515 DEFINE_TEST_NEON_ACROSS(sminv, Basic)

2516 DEFINE_TEST_NEON_ACROSS(addv, Basic)

2517 DEFINE_TEST_NEON_ACROSS_LONG(uaddlv, Basic)

2518 DEFINE_TEST_NEON_ACROSS(umaxv, Basic)

2519 DEFINE_TEST_NEON_ACROSS(uminv, Basic)

2520 DEFINE_TEST_NEON_ACROSS_FP(fmaxnmv, Basic)

2521 DEFINE_TEST_NEON_ACROSS_FP(fmaxv, Basic)

2522 DEFINE_TEST_NEON_ACROSS_FP(fminnmv, Basic)

2523 DEFINE_TEST_NEON_ACROSS_FP(fminv, Basic)

2524

2525 // Advanced SIMD permute.

2526 DEFINE_TEST_NEON_3SAME(uzp1, Basic)

2527 DEFINE_TEST_NEON_3SAME(trn1, Basic)

2528 DEFINE_TEST_NEON_3SAME(zip1, Basic)

2529 DEFINE_TEST_NEON_3SAME(uzp2, Basic)

2530 DEFINE_TEST_NEON_3SAME(trn2, Basic)

2531 DEFINE_TEST_NEON_3SAME(zip2, Basic)

2532

2533 // Advanced SIMD vector x indexed element.

2534 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlal, Basic, Basic, Basic)

2535 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlal, Basic, Basic, Basic)

2536 DEFINE_TEST_NEON_BYELEMENT_DIFF(smlsl, Basic, Basic, Basic)

2537 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmlsl, Basic, Basic, Basic)

2538 DEFINE_TEST_NEON_BYELEMENT(mul, Basic, Basic, Basic)

2539 DEFINE_TEST_NEON_BYELEMENT_DIFF(smull, Basic, Basic, Basic)

2540 DEFINE_TEST_NEON_BYELEMENT_DIFF(sqdmull, Basic, Basic, Basic)

2541 DEFINE_TEST_NEON_BYELEMENT(sqdmulh, Basic, Basic, Basic)

2542 DEFINE_TEST_NEON_BYELEMENT(sqrdmulh, Basic, Basic, Basic)

2543 DEFINE_TEST_NEON_FP_BYELEMENT(fmla, Basic, Basic, Basic)

2544 DEFINE_TEST_NEON_FP_BYELEMENT(fmls, Basic, Basic, Basic)

2545 DEFINE_TEST_NEON_FP_BYELEMENT(fmul, Basic, Basic, Basic)

2546 DEFINE_TEST_NEON_BYELEMENT(mla, Basic, Basic, Basic)

2547 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlal, Basic, Basic, Basic)

2548 DEFINE_TEST_NEON_BYELEMENT(mls, Basic, Basic, Basic)

2549 DEFINE_TEST_NEON_BYELEMENT_DIFF(umlsl, Basic, Basic, Basic)

2550 DEFINE_TEST_NEON_BYELEMENT_DIFF(umull, Basic, Basic, Basic)

2551 DEFINE_TEST_NEON_FP_BYELEMENT(fmulx, Basic, Basic, Basic)

2552

2553 // Advanced SIMD scalar x indexed element.

2554 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlal, Basic, Basic, Basic)

2555 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmlsl, Basic, Basic, Basic)

2556 DEFINE_TEST_NEON_BYELEMENT_DIFF_SCALAR(sqdmull, Basic, Basic, Basic)

2557 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqdmulh, Basic, Basic, Basic)

2558 DEFINE_TEST_NEON_BYELEMENT_SCALAR(sqrdmulh, Basic, Basic, Basic)

2559 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmla, Basic, Basic, Basic)

2560 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmls, Basic, Basic, Basic)

2561 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmul, Basic, Basic, Basic)

2562 DEFINE_TEST_NEON_FP_BYELEMENT_SCALAR(fmulx, Basic, Basic, Basic)

OLD	NEW

« no previous file with comments | « test/cctest/test-disasm-arm64.cc ('k') | test/cctest/test-simulator-neon-inputs-arm64.h » ('j') | no next file with comments »