Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(235)

Side by Side Diff: src/arm64/logic-arm64.cc

Issue 2622643005: ARM64: Add NEON support (Closed)
Patch Set: Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #if V8_TARGET_ARCH_ARM64
6
7 #include <cmath>
8 #include "src/arm64/simulator-arm64.h"
bbudge 2017/01/31 01:41:32 Did you pull this out of simulator-arm64.cc becaus
martyn.capewell 2017/02/03 11:01:31 This file contains the more complicated simulator
bbudge 2017/02/08 01:39:11 That's fine.
martyn.capewell 2017/02/15 11:51:00 Done.
9
10 namespace v8 {
11 namespace internal {
12
13 #if defined(USE_SIMULATOR)
14
15 template <>
16 double Simulator::FPDefaultNaN<double>() {
17 return kFP64DefaultNaN;
18 }
bbudge 2017/01/31 01:41:32 Why not define these inline in the header, simulat
martyn.capewell 2017/02/03 11:01:31 Done.
19
20 template <>
21 float Simulator::FPDefaultNaN<float>() {
22 return kFP32DefaultNaN;
23 }
24
25 namespace {
26
27 // See FPRound for a description of this function.
28 inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa,
29 FPRounding round_mode) {
30 uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(
31 sign, exponent, mantissa, round_mode);
32 return bit_cast<double>(bits);
33 }
34
35 // See FPRound for a description of this function.
36 inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa,
37 FPRounding round_mode) {
38 uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(
39 sign, exponent, mantissa, round_mode);
40 return bit_cast<float>(bits);
41 }
42
43 // See FPRound for a description of this function.
44 inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent,
45 uint64_t mantissa, FPRounding round_mode) {
46 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
47 sign, exponent, mantissa, round_mode);
48 }
49
50 } // anonymous namespace
bbudge 2017/01/31 01:41:32 nit: just 'namespace'
martyn.capewell 2017/02/03 11:01:31 Done.
51
52 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
53 if (src >= 0) {
54 return UFixedToDouble(src, fbits, round);
55 } else if (src == INT64_MIN) {
56 return -UFixedToDouble(src, fbits, round);
57 } else {
58 return -UFixedToDouble(-src, fbits, round);
59 }
60 }
61
62 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
63 // An input of 0 is a special case because the result is effectively
64 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
65 if (src == 0) {
66 return 0.0;
67 }
68
69 // Calculate the exponent. The highest significant bit will have the value
70 // 2^exponent.
71 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
72 const int64_t exponent = highest_significant_bit - fbits;
73
74 return FPRoundToDouble(0, exponent, src, round);
75 }
76
77 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
78 if (src >= 0) {
79 return UFixedToFloat(src, fbits, round);
80 } else if (src == INT64_MIN) {
81 return -UFixedToFloat(src, fbits, round);
82 } else {
83 return -UFixedToFloat(-src, fbits, round);
84 }
85 }
86
87 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
88 // An input of 0 is a special case because the result is effectively
89 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
90 if (src == 0) {
91 return 0.0f;
92 }
93
94 // Calculate the exponent. The highest significant bit will have the value
95 // 2^exponent.
96 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
97 const int32_t exponent = highest_significant_bit - fbits;
98
99 return FPRoundToFloat(0, exponent, src, round);
100 }
101
102 double Simulator::FPToDouble(float value) {
103 switch (std::fpclassify(value)) {
104 case FP_NAN: {
105 if (IsSignallingNaN(value)) {
106 FPProcessException();
107 }
108 if (DN()) return kFP64DefaultNaN;
109
110 // Convert NaNs as the processor would:
111 // - The sign is propagated.
112 // - The payload (mantissa) is transferred entirely, except that the top
bbudge 2017/01/31 01:41:32 s/payload/mantissa ?
martyn.capewell 2017/02/03 11:01:31 Not sure what you mean here. In the context of a N
bbudge 2017/02/08 01:39:11 For consistency. It looks like 'mantissa' is used
martyn.capewell 2017/02/15 11:51:00 Done.
113 // bit is forced to '1', making the result a quiet NaN. The unused
114 // (low-order) payload bits are set to 0.
115 uint32_t raw = bit_cast<uint32_t>(value);
116
117 uint64_t sign = raw >> 31;
118 uint64_t exponent = (1 << kDoubleExponentBits) - 1;
119 uint64_t payload = unsigned_bitextract_64(21, 0, raw);
120
121 // Unused low-order bits remain zero.
122 payload <<= (kDoubleMantissaBits - kFloatMantissaBits);
123
124 // Force a quiet NaN.
125 payload |= (UINT64_C(1) << (kDoubleMantissaBits - 1));
126
127 return double_pack(sign, exponent, payload);
128 }
129
130 case FP_ZERO:
131 case FP_NORMAL:
132 case FP_SUBNORMAL:
133 case FP_INFINITE: {
134 // All other inputs are preserved in a standard cast, because every value
135 // representable using an IEEE-754 float is also representable using an
136 // IEEE-754 double.
137 return static_cast<double>(value);
138 }
139 }
140
141 UNREACHABLE();
142 return kFP64DefaultNaN;
143 }
144
145 float Simulator::FPToFloat(float16 value) {
146 uint32_t sign = value >> 15;
147 uint32_t exponent =
148 unsigned_bitextract_32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
149 kFloat16MantissaBits, value);
150 uint32_t mantissa =
151 unsigned_bitextract_32(kFloat16MantissaBits - 1, 0, value);
152
153 switch (float16classify(value)) {
154 case FP_ZERO:
155 return (sign == 0) ? 0.0f : -0.0f;
156
157 case FP_INFINITE:
158 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
159
160 case FP_SUBNORMAL: {
161 // Calculate shift required to put mantissa into the most-significant bits
162 // of the destination mantissa.
163 int shift = CountLeadingZeros(mantissa << (32 - 10), 32);
164
165 // Shift mantissa and discard implicit '1'.
166 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
167 mantissa &= (1 << kFloatMantissaBits) - 1;
168
169 // Adjust the exponent for the shift applied, and rebias.
170 exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias);
171 break;
172 }
173
174 case FP_NAN: {
175 if (IsSignallingNaN(value)) {
176 FPProcessException();
177 }
178 if (DN()) return kFP32DefaultNaN;
179
180 // Convert NaNs as the processor would:
181 // - The sign is propagated.
182 // - The payload (mantissa) is transferred entirely, except that the top
183 // bit is forced to '1', making the result a quiet NaN. The unused
184 // (low-order) payload bits are set to 0.
185 exponent = (1 << kFloatExponentBits) - 1;
186
187 // Increase bits in mantissa, making low-order bits 0.
188 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
189 mantissa |= 1 << (kFloatMantissaBits - 1); // Force a quiet NaN.
190 break;
191 }
192
193 case FP_NORMAL: {
194 // Increase bits in mantissa, making low-order bits 0.
195 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
196
197 // Change exponent bias.
198 exponent += (kFloatExponentBias - kFloat16ExponentBias);
199 break;
200 }
201
202 default:
203 UNREACHABLE();
204 return kFP32DefaultNaN;
205 }
206 return float_pack(sign, exponent, mantissa);
207 }
208
209 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
210 // Only the FPTieEven rounding mode is implemented.
211 DCHECK_EQ(round_mode, FPTieEven);
212 USE(round_mode);
213
214 int64_t sign = float_sign(value);
215 int64_t exponent =
216 static_cast<int64_t>(float_exp(value)) - kFloatExponentBias;
217 uint32_t mantissa = float_mantissa(value);
218
219 switch (std::fpclassify(value)) {
220 case FP_NAN: {
221 if (IsSignallingNaN(value)) {
222 FPProcessException();
223 }
224 if (DN()) return kFP16DefaultNaN;
225
226 // Convert NaNs as the processor would:
227 // - The sign is propagated.
228 // - The payload (mantissa) is transferred as much as possible, except
229 // that the top bit is forced to '1', making the result a quiet NaN.
230 float16 result =
231 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
232 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
233 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;
234 return result;
235 }
236
237 case FP_ZERO:
238 return (sign == 0) ? 0 : 0x8000;
239
240 case FP_INFINITE:
241 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
242
243 case FP_NORMAL:
244 case FP_SUBNORMAL: {
245 // Convert float-to-half as the processor would, assuming that FPCR.FZ
246 // (flush-to-zero) is not set.
247
248 // Add the implicit '1' bit to the mantissa.
249 mantissa += (1 << kFloatMantissaBits);
250 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
251 }
252 }
253
254 UNREACHABLE();
255 return kFP16DefaultNaN;
256 }
257
258 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
259 // Only the FPTieEven rounding mode is implemented.
260 DCHECK_EQ(round_mode, FPTieEven);
261 USE(round_mode);
262
263 int64_t sign = double_sign(value);
264 int64_t exponent =
265 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
266 uint64_t mantissa = double_mantissa(value);
267
268 switch (std::fpclassify(value)) {
269 case FP_NAN: {
270 if (IsSignallingNaN(value)) {
271 FPProcessException();
272 }
273 if (DN()) return kFP16DefaultNaN;
274
275 // Convert NaNs as the processor would:
276 // - The sign is propagated.
277 // - The payload (mantissa) is transferred as much as possible, except
278 // that the top bit is forced to '1', making the result a quiet NaN.
279 float16 result =
280 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
281 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
282 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;
283 return result;
284 }
285
286 case FP_ZERO:
287 return (sign == 0) ? 0 : 0x8000;
288
289 case FP_INFINITE:
290 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
291
292 case FP_NORMAL:
293 case FP_SUBNORMAL: {
294 // Convert double-to-half as the processor would, assuming that FPCR.FZ
295 // (flush-to-zero) is not set.
296
297 // Add the implicit '1' bit to the mantissa.
298 mantissa += (UINT64_C(1) << kDoubleMantissaBits);
299 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
300 }
301 }
302
303 UNREACHABLE();
304 return kFP16DefaultNaN;
305 }
306
307 float Simulator::FPToFloat(double value, FPRounding round_mode) {
308 // Only the FPTieEven rounding mode is implemented.
309 DCHECK((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
310 USE(round_mode);
311
312 switch (std::fpclassify(value)) {
313 case FP_NAN: {
314 if (IsSignallingNaN(value)) {
315 FPProcessException();
316 }
317 if (DN()) return kFP32DefaultNaN;
318
319 // Convert NaNs as the processor would:
320 // - The sign is propagated.
321 // - The payload (mantissa) is transferred as much as possible, except
322 // that the top bit is forced to '1', making the result a quiet NaN.
bbudge 2017/01/31 01:41:32 s/payload/mantissa
martyn.capewell 2017/02/15 11:51:02 Done.
323
324 uint64_t raw = bit_cast<uint64_t>(value);
325
326 uint32_t sign = raw >> 63;
327 uint32_t exponent = (1 << 8) - 1;
328 uint32_t payload = static_cast<uint32_t>(unsigned_bitextract_64(
329 50, kDoubleMantissaBits - kFloatMantissaBits, raw));
330 payload |= (1 << (kFloatMantissaBits - 1)); // Force a quiet NaN.
331
332 return float_pack(sign, exponent, payload);
333 }
334
335 case FP_ZERO:
336 case FP_INFINITE: {
337 // In a C++ cast, any value representable in the target type will be
338 // unchanged. This is always the case for +/-0.0 and infinities.
339 return static_cast<float>(value);
340 }
341
342 case FP_NORMAL:
343 case FP_SUBNORMAL: {
344 // Convert double-to-float as the processor would, assuming that FPCR.FZ
345 // (flush-to-zero) is not set.
346 uint32_t sign = double_sign(value);
347 int64_t exponent =
348 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
349 uint64_t mantissa = double_mantissa(value);
350 if (std::fpclassify(value) == FP_NORMAL) {
351 // For normal FP values, add the hidden bit.
352 mantissa |= (UINT64_C(1) << kDoubleMantissaBits);
353 }
354 return FPRoundToFloat(sign, exponent, mantissa, round_mode);
355 }
356 }
357
358 UNREACHABLE();
359 return kFP32DefaultNaN;
360 }
361
362 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
363 dst.ClearForWrite(vform);
364 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
365 dst.ReadUintFromMem(vform, i, addr);
366 addr += LaneSizeInBytesFromFormat(vform);
367 }
368 }
369
370 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index,
371 uint64_t addr) {
372 dst.ReadUintFromMem(vform, index, addr);
373 }
374
375 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
376 dst.ClearForWrite(vform);
377 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
378 dst.ReadUintFromMem(vform, i, addr);
379 }
380 }
381
382 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
383 LogicVRegister dst2, uint64_t addr1) {
384 dst1.ClearForWrite(vform);
385 dst2.ClearForWrite(vform);
386 int esize = LaneSizeInBytesFromFormat(vform);
387 uint64_t addr2 = addr1 + esize;
388 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
389 dst1.ReadUintFromMem(vform, i, addr1);
390 dst2.ReadUintFromMem(vform, i, addr2);
391 addr1 += 2 * esize;
392 addr2 += 2 * esize;
393 }
394 }
395
396 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
397 LogicVRegister dst2, int index, uint64_t addr1) {
398 dst1.ClearForWrite(vform);
399 dst2.ClearForWrite(vform);
400 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
401 dst1.ReadUintFromMem(vform, index, addr1);
402 dst2.ReadUintFromMem(vform, index, addr2);
403 }
404
405 void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1,
406 LogicVRegister dst2, uint64_t addr) {
407 dst1.ClearForWrite(vform);
408 dst2.ClearForWrite(vform);
409 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
410 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
411 dst1.ReadUintFromMem(vform, i, addr);
412 dst2.ReadUintFromMem(vform, i, addr2);
413 }
414 }
415
416 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
417 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) {
418 dst1.ClearForWrite(vform);
419 dst2.ClearForWrite(vform);
420 dst3.ClearForWrite(vform);
421 int esize = LaneSizeInBytesFromFormat(vform);
422 uint64_t addr2 = addr1 + esize;
423 uint64_t addr3 = addr2 + esize;
424 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
425 dst1.ReadUintFromMem(vform, i, addr1);
426 dst2.ReadUintFromMem(vform, i, addr2);
427 dst3.ReadUintFromMem(vform, i, addr3);
428 addr1 += 3 * esize;
429 addr2 += 3 * esize;
430 addr3 += 3 * esize;
431 }
432 }
433
434 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
435 LogicVRegister dst2, LogicVRegister dst3, int index,
436 uint64_t addr1) {
437 dst1.ClearForWrite(vform);
438 dst2.ClearForWrite(vform);
439 dst3.ClearForWrite(vform);
440 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
441 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
442 dst1.ReadUintFromMem(vform, index, addr1);
443 dst2.ReadUintFromMem(vform, index, addr2);
444 dst3.ReadUintFromMem(vform, index, addr3);
445 }
446
447 void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1,
448 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) {
449 dst1.ClearForWrite(vform);
450 dst2.ClearForWrite(vform);
451 dst3.ClearForWrite(vform);
452 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
453 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
454 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
455 dst1.ReadUintFromMem(vform, i, addr);
456 dst2.ReadUintFromMem(vform, i, addr2);
457 dst3.ReadUintFromMem(vform, i, addr3);
458 }
459 }
460
461 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
462 LogicVRegister dst2, LogicVRegister dst3,
463 LogicVRegister dst4, uint64_t addr1) {
464 dst1.ClearForWrite(vform);
465 dst2.ClearForWrite(vform);
466 dst3.ClearForWrite(vform);
467 dst4.ClearForWrite(vform);
468 int esize = LaneSizeInBytesFromFormat(vform);
469 uint64_t addr2 = addr1 + esize;
470 uint64_t addr3 = addr2 + esize;
471 uint64_t addr4 = addr3 + esize;
472 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
473 dst1.ReadUintFromMem(vform, i, addr1);
474 dst2.ReadUintFromMem(vform, i, addr2);
475 dst3.ReadUintFromMem(vform, i, addr3);
476 dst4.ReadUintFromMem(vform, i, addr4);
477 addr1 += 4 * esize;
478 addr2 += 4 * esize;
479 addr3 += 4 * esize;
480 addr4 += 4 * esize;
481 }
482 }
483
484 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
485 LogicVRegister dst2, LogicVRegister dst3,
486 LogicVRegister dst4, int index, uint64_t addr1) {
487 dst1.ClearForWrite(vform);
488 dst2.ClearForWrite(vform);
489 dst3.ClearForWrite(vform);
490 dst4.ClearForWrite(vform);
491 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
492 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
493 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
494 dst1.ReadUintFromMem(vform, index, addr1);
495 dst2.ReadUintFromMem(vform, index, addr2);
496 dst3.ReadUintFromMem(vform, index, addr3);
497 dst4.ReadUintFromMem(vform, index, addr4);
498 }
499
500 void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1,
501 LogicVRegister dst2, LogicVRegister dst3,
502 LogicVRegister dst4, uint64_t addr) {
503 dst1.ClearForWrite(vform);
504 dst2.ClearForWrite(vform);
505 dst3.ClearForWrite(vform);
506 dst4.ClearForWrite(vform);
507 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
508 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
509 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
510 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
511 dst1.ReadUintFromMem(vform, i, addr);
512 dst2.ReadUintFromMem(vform, i, addr2);
513 dst3.ReadUintFromMem(vform, i, addr3);
514 dst4.ReadUintFromMem(vform, i, addr4);
515 }
516 }
517
518 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
519 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
520 src.WriteUintToMem(vform, i, addr);
521 addr += LaneSizeInBytesFromFormat(vform);
522 }
523 }
524
525 void Simulator::st1(VectorFormat vform, LogicVRegister src, int index,
526 uint64_t addr) {
527 src.WriteUintToMem(vform, index, addr);
528 }
529
530 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
531 uint64_t addr) {
532 int esize = LaneSizeInBytesFromFormat(vform);
533 uint64_t addr2 = addr + esize;
534 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
535 dst.WriteUintToMem(vform, i, addr);
536 dst2.WriteUintToMem(vform, i, addr2);
537 addr += 2 * esize;
538 addr2 += 2 * esize;
539 }
540 }
541
542 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
543 int index, uint64_t addr) {
544 int esize = LaneSizeInBytesFromFormat(vform);
545 dst.WriteUintToMem(vform, index, addr);
546 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
547 }
548
549 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
550 LogicVRegister dst3, uint64_t addr) {
551 int esize = LaneSizeInBytesFromFormat(vform);
552 uint64_t addr2 = addr + esize;
553 uint64_t addr3 = addr2 + esize;
554 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
555 dst.WriteUintToMem(vform, i, addr);
556 dst2.WriteUintToMem(vform, i, addr2);
557 dst3.WriteUintToMem(vform, i, addr3);
558 addr += 3 * esize;
559 addr2 += 3 * esize;
560 addr3 += 3 * esize;
561 }
562 }
563
564 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
565 LogicVRegister dst3, int index, uint64_t addr) {
566 int esize = LaneSizeInBytesFromFormat(vform);
567 dst.WriteUintToMem(vform, index, addr);
568 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
569 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
570 }
571
572 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
573 LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) {
574 int esize = LaneSizeInBytesFromFormat(vform);
575 uint64_t addr2 = addr + esize;
576 uint64_t addr3 = addr2 + esize;
577 uint64_t addr4 = addr3 + esize;
578 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
579 dst.WriteUintToMem(vform, i, addr);
580 dst2.WriteUintToMem(vform, i, addr2);
581 dst3.WriteUintToMem(vform, i, addr3);
582 dst4.WriteUintToMem(vform, i, addr4);
583 addr += 4 * esize;
584 addr2 += 4 * esize;
585 addr3 += 4 * esize;
586 addr4 += 4 * esize;
587 }
588 }
589
590 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
591 LogicVRegister dst3, LogicVRegister dst4, int index,
592 uint64_t addr) {
593 int esize = LaneSizeInBytesFromFormat(vform);
594 dst.WriteUintToMem(vform, index, addr);
595 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
596 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
597 dst4.WriteUintToMem(vform, index, addr + 3 * esize);
598 }
599
600 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
601 const LogicVRegister& src1,
602 const LogicVRegister& src2, Condition cond) {
603 dst.ClearForWrite(vform);
604 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
605 int64_t sa = src1.Int(vform, i);
606 int64_t sb = src2.Int(vform, i);
607 uint64_t ua = src1.Uint(vform, i);
608 uint64_t ub = src2.Uint(vform, i);
609 bool result = false;
610 switch (cond) {
611 case eq:
612 result = (ua == ub);
613 break;
614 case ge:
615 result = (sa >= sb);
616 break;
617 case gt:
618 result = (sa > sb);
619 break;
620 case hi:
621 result = (ua > ub);
622 break;
623 case hs:
624 result = (ua >= ub);
625 break;
626 case lt:
627 result = (sa < sb);
628 break;
629 case le:
630 result = (sa <= sb);
631 break;
632 default:
633 UNREACHABLE();
634 break;
635 }
636 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
637 }
638 return dst;
639 }
640
641 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
642 const LogicVRegister& src1, int imm,
643 Condition cond) {
644 SimVRegister temp;
645 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
646 return cmp(vform, dst, src1, imm_reg, cond);
647 }
648
649 LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst,
650 const LogicVRegister& src1,
651 const LogicVRegister& src2) {
652 dst.ClearForWrite(vform);
653 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
654 uint64_t ua = src1.Uint(vform, i);
655 uint64_t ub = src2.Uint(vform, i);
656 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
657 }
658 return dst;
659 }
660
661 LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst,
662 const LogicVRegister& src1,
663 const LogicVRegister& src2) {
664 int lane_size = LaneSizeInBitsFromFormat(vform);
665 dst.ClearForWrite(vform);
666 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
667 // Test for unsigned saturation.
668 uint64_t ua = src1.UintLeftJustified(vform, i);
669 uint64_t ub = src2.UintLeftJustified(vform, i);
670 uint64_t ur = ua + ub;
671 if (ur < ua) {
672 dst.SetUnsignedSat(i, true);
673 }
674
675 // Test for signed saturation.
676 bool pos_a = (ua >> 63) == 0;
677 bool pos_b = (ub >> 63) == 0;
678 bool pos_r = (ur >> 63) == 0;
679 // If the signs of the operands are the same, but different from the result,
680 // there was an overflow.
681 if ((pos_a == pos_b) && (pos_a != pos_r)) {
682 dst.SetSignedSat(i, pos_a);
683 }
684
685 dst.SetInt(vform, i, ur >> (64 - lane_size));
686 }
687 return dst;
688 }
689
690 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
691 const LogicVRegister& src1,
692 const LogicVRegister& src2) {
693 SimVRegister temp1, temp2;
694 uzp1(vform, temp1, src1, src2);
695 uzp2(vform, temp2, src1, src2);
696 add(vform, dst, temp1, temp2);
697 return dst;
698 }
699
700 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
701 const LogicVRegister& src1,
702 const LogicVRegister& src2) {
703 SimVRegister temp;
704 mul(vform, temp, src1, src2);
705 add(vform, dst, dst, temp);
706 return dst;
707 }
708
709 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
710 const LogicVRegister& src1,
711 const LogicVRegister& src2) {
712 SimVRegister temp;
713 mul(vform, temp, src1, src2);
714 sub(vform, dst, dst, temp);
715 return dst;
716 }
717
718 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
719 const LogicVRegister& src1,
720 const LogicVRegister& src2) {
721 dst.ClearForWrite(vform);
722 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
723 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
724 }
725 return dst;
726 }
727
728 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
729 const LogicVRegister& src1,
730 const LogicVRegister& src2, int index) {
731 SimVRegister temp;
732 VectorFormat indexform = VectorFormatFillQ(vform);
733 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
734 }
735
736 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
737 const LogicVRegister& src1,
738 const LogicVRegister& src2, int index) {
739 SimVRegister temp;
740 VectorFormat indexform = VectorFormatFillQ(vform);
741 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
742 }
743
744 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
745 const LogicVRegister& src1,
746 const LogicVRegister& src2, int index) {
747 SimVRegister temp;
748 VectorFormat indexform = VectorFormatFillQ(vform);
749 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
750 }
751
752 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
753 const LogicVRegister& src1,
754 const LogicVRegister& src2, int index) {
755 SimVRegister temp;
756 VectorFormat indexform =
757 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
758 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
759 }
760
761 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
762 const LogicVRegister& src1,
763 const LogicVRegister& src2, int index) {
764 SimVRegister temp;
765 VectorFormat indexform =
766 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
767 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
768 }
769
770 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
771 const LogicVRegister& src1,
772 const LogicVRegister& src2, int index) {
773 SimVRegister temp;
774 VectorFormat indexform =
775 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
776 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
777 }
778
779 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
780 const LogicVRegister& src1,
781 const LogicVRegister& src2, int index) {
782 SimVRegister temp;
783 VectorFormat indexform =
784 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
785 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
786 }
787
788 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
789 const LogicVRegister& src1,
790 const LogicVRegister& src2, int index) {
791 SimVRegister temp;
792 VectorFormat indexform =
793 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
794 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
795 }
796
797 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
798 const LogicVRegister& src1,
799 const LogicVRegister& src2, int index) {
800 SimVRegister temp;
801 VectorFormat indexform =
802 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
803 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
804 }
805
806 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
807 const LogicVRegister& src1,
808 const LogicVRegister& src2, int index) {
809 SimVRegister temp;
810 VectorFormat indexform =
811 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
812 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
813 }
814
815 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
816 const LogicVRegister& src1,
817 const LogicVRegister& src2, int index) {
818 SimVRegister temp;
819 VectorFormat indexform =
820 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
821 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
822 }
823
824 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
825 const LogicVRegister& src1,
826 const LogicVRegister& src2, int index) {
827 SimVRegister temp;
828 VectorFormat indexform =
829 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
830 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
831 }
832
833 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
834 const LogicVRegister& src1,
835 const LogicVRegister& src2, int index) {
836 SimVRegister temp;
837 VectorFormat indexform =
838 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
839 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
840 }
841
842 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
843 const LogicVRegister& src1,
844 const LogicVRegister& src2, int index) {
845 SimVRegister temp;
846 VectorFormat indexform =
847 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
848 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
849 }
850
851 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
852 const LogicVRegister& src1,
853 const LogicVRegister& src2, int index) {
854 SimVRegister temp;
855 VectorFormat indexform =
856 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
857 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
858 }
859
860 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
861 const LogicVRegister& src1,
862 const LogicVRegister& src2, int index) {
863 SimVRegister temp;
864 VectorFormat indexform =
865 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
866 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
867 }
868
869 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
870 const LogicVRegister& src1,
871 const LogicVRegister& src2, int index) {
872 SimVRegister temp;
873 VectorFormat indexform =
874 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
875 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
876 }
877
878 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
879 const LogicVRegister& src1,
880 const LogicVRegister& src2, int index) {
881 SimVRegister temp;
882 VectorFormat indexform =
883 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
884 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
885 }
886
887 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
888 const LogicVRegister& src1,
889 const LogicVRegister& src2, int index) {
890 SimVRegister temp;
891 VectorFormat indexform =
892 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
893 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
894 }
895
896 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
897 const LogicVRegister& src1,
898 const LogicVRegister& src2, int index) {
899 SimVRegister temp;
900 VectorFormat indexform =
901 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
902 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
903 }
904
905 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
906 const LogicVRegister& src1,
907 const LogicVRegister& src2, int index) {
908 SimVRegister temp;
909 VectorFormat indexform =
910 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
911 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
912 }
913
914 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
915 const LogicVRegister& src1,
916 const LogicVRegister& src2, int index) {
917 SimVRegister temp;
918 VectorFormat indexform = VectorFormatFillQ(vform);
919 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
920 }
921
922 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
923 const LogicVRegister& src1,
924 const LogicVRegister& src2, int index) {
925 SimVRegister temp;
926 VectorFormat indexform = VectorFormatFillQ(vform);
927 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
928 }
929
930 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
931 uint16_t result = 0;
932 uint16_t extended_op2 = op2;
933 for (int i = 0; i < 8; ++i) {
934 if ((op1 >> i) & 1) {
935 result = result ^ (extended_op2 << i);
936 }
937 }
938 return result;
939 }
940
941 LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst,
942 const LogicVRegister& src1,
943 const LogicVRegister& src2) {
944 dst.ClearForWrite(vform);
945 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
946 dst.SetUint(vform, i,
947 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
948 }
949 return dst;
950 }
951
952 LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst,
953 const LogicVRegister& src1,
954 const LogicVRegister& src2) {
955 VectorFormat vform_src = VectorFormatHalfWidth(vform);
956 dst.ClearForWrite(vform);
957 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
958 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, i),
959 src2.Uint(vform_src, i)));
960 }
961 return dst;
962 }
963
964 LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst,
965 const LogicVRegister& src1,
966 const LogicVRegister& src2) {
967 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
968 dst.ClearForWrite(vform);
969 int lane_count = LaneCountFromFormat(vform);
970 for (int i = 0; i < lane_count; i++) {
971 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, lane_count + i),
972 src2.Uint(vform_src, lane_count + i)));
973 }
974 return dst;
975 }
976
977 LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst,
978 const LogicVRegister& src1,
979 const LogicVRegister& src2) {
980 int lane_size = LaneSizeInBitsFromFormat(vform);
981 dst.ClearForWrite(vform);
982 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
983 // Test for unsigned saturation.
984 uint64_t ua = src1.UintLeftJustified(vform, i);
985 uint64_t ub = src2.UintLeftJustified(vform, i);
986 uint64_t ur = ua - ub;
987 if (ub > ua) {
988 dst.SetUnsignedSat(i, false);
989 }
990
991 // Test for signed saturation.
992 bool pos_a = (ua >> 63) == 0;
993 bool pos_b = (ub >> 63) == 0;
994 bool pos_r = (ur >> 63) == 0;
995 // If the signs of the operands are different, and the sign of the first
996 // operand doesn't match the result, there was an overflow.
997 if ((pos_a != pos_b) && (pos_a != pos_r)) {
998 dst.SetSignedSat(i, pos_a);
999 }
1000
1001 dst.SetInt(vform, i, ur >> (64 - lane_size));
1002 }
1003 return dst;
1004 }
1005
1006 LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst,
1007 const LogicVRegister& src1,
1008 const LogicVRegister& src2) {
1009 dst.ClearForWrite(vform);
1010 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1011 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1012 }
1013 return dst;
1014 }
1015
1016 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
1017 const LogicVRegister& src1,
1018 const LogicVRegister& src2) {
1019 dst.ClearForWrite(vform);
1020 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1021 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1022 }
1023 return dst;
1024 }
1025
1026 LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst,
1027 const LogicVRegister& src1,
1028 const LogicVRegister& src2) {
1029 dst.ClearForWrite(vform);
1030 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1031 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1032 }
1033 return dst;
1034 }
1035
1036 LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst,
1037 const LogicVRegister& src1,
1038 const LogicVRegister& src2) {
1039 dst.ClearForWrite(vform);
1040 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1041 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1042 }
1043 return dst;
1044 }
1045
1046 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1047 const LogicVRegister& src1,
1048 const LogicVRegister& src2) {
1049 dst.ClearForWrite(vform);
1050 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1051 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1052 }
1053 return dst;
1054 }
1055
1056 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1057 const LogicVRegister& src, uint64_t imm) {
1058 uint64_t result[16];
1059 int laneCount = LaneCountFromFormat(vform);
1060 for (int i = 0; i < laneCount; ++i) {
1061 result[i] = src.Uint(vform, i) & ~imm;
1062 }
1063 dst.SetUintArray(vform, result);
1064 return dst;
1065 }
1066
1067 LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst,
1068 const LogicVRegister& src1,
1069 const LogicVRegister& src2) {
1070 dst.ClearForWrite(vform);
1071 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1072 uint64_t operand1 = dst.Uint(vform, i);
1073 uint64_t operand2 = ~src2.Uint(vform, i);
1074 uint64_t operand3 = src1.Uint(vform, i);
1075 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1076 dst.SetUint(vform, i, result);
1077 }
1078 return dst;
1079 }
1080
1081 LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst,
1082 const LogicVRegister& src1,
1083 const LogicVRegister& src2) {
1084 dst.ClearForWrite(vform);
1085 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1086 uint64_t operand1 = dst.Uint(vform, i);
1087 uint64_t operand2 = src2.Uint(vform, i);
1088 uint64_t operand3 = src1.Uint(vform, i);
1089 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1090 dst.SetUint(vform, i, result);
1091 }
1092 return dst;
1093 }
1094
1095 LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst,
1096 const LogicVRegister& src1,
1097 const LogicVRegister& src2) {
1098 dst.ClearForWrite(vform);
1099 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1100 uint64_t operand1 = src2.Uint(vform, i);
1101 uint64_t operand2 = dst.Uint(vform, i);
1102 uint64_t operand3 = src1.Uint(vform, i);
1103 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1104 dst.SetUint(vform, i, result);
1105 }
1106 return dst;
1107 }
1108
1109 LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst,
1110 const LogicVRegister& src1,
1111 const LogicVRegister& src2, bool max) {
1112 dst.ClearForWrite(vform);
1113 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1114 int64_t src1_val = src1.Int(vform, i);
1115 int64_t src2_val = src2.Int(vform, i);
1116 int64_t dst_val;
1117 if (max) {
1118 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1119 } else {
1120 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1121 }
1122 dst.SetInt(vform, i, dst_val);
1123 }
1124 return dst;
1125 }
1126
1127 LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst,
1128 const LogicVRegister& src1,
1129 const LogicVRegister& src2) {
1130 return SMinMax(vform, dst, src1, src2, true);
1131 }
1132
1133 LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst,
1134 const LogicVRegister& src1,
1135 const LogicVRegister& src2) {
1136 return SMinMax(vform, dst, src1, src2, false);
1137 }
1138
1139 LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst,
1140 const LogicVRegister& src1,
1141 const LogicVRegister& src2, bool max) {
1142 int lanes = LaneCountFromFormat(vform);
1143 int64_t result[kMaxLanesPerVector];
1144 const LogicVRegister* src = &src1;
1145 for (int j = 0; j < 2; j++) {
1146 for (int i = 0; i < lanes; i += 2) {
1147 int64_t first_val = src->Int(vform, i);
1148 int64_t second_val = src->Int(vform, i + 1);
1149 int64_t dst_val;
1150 if (max) {
1151 dst_val = (first_val > second_val) ? first_val : second_val;
1152 } else {
1153 dst_val = (first_val < second_val) ? first_val : second_val;
1154 }
1155 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1156 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1157 }
1158 src = &src2;
1159 }
1160 dst.SetIntArray(vform, result);
1161 return dst;
1162 }
1163
1164 LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst,
1165 const LogicVRegister& src1,
1166 const LogicVRegister& src2) {
1167 return SMinMaxP(vform, dst, src1, src2, true);
1168 }
1169
1170 LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst,
1171 const LogicVRegister& src1,
1172 const LogicVRegister& src2) {
1173 return SMinMaxP(vform, dst, src1, src2, false);
1174 }
1175
1176 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
1177 const LogicVRegister& src) {
1178 DCHECK_EQ(vform, kFormatD);
1179
1180 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1181 dst.ClearForWrite(vform);
1182 dst.SetUint(vform, 0, dst_val);
1183 return dst;
1184 }
1185
1186 LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst,
1187 const LogicVRegister& src) {
1188 VectorFormat vform_dst =
1189 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1190
1191 int64_t dst_val = 0;
1192 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1193 dst_val += src.Int(vform, i);
1194 }
1195
1196 dst.ClearForWrite(vform_dst);
1197 dst.SetInt(vform_dst, 0, dst_val);
1198 return dst;
1199 }
1200
1201 LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst,
1202 const LogicVRegister& src) {
1203 VectorFormat vform_dst =
1204 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1205
1206 int64_t dst_val = 0;
1207 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1208 dst_val += src.Int(vform, i);
1209 }
1210
1211 dst.ClearForWrite(vform_dst);
1212 dst.SetInt(vform_dst, 0, dst_val);
1213 return dst;
1214 }
1215
1216 LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst,
1217 const LogicVRegister& src) {
1218 VectorFormat vform_dst =
1219 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1220
1221 uint64_t dst_val = 0;
1222 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1223 dst_val += src.Uint(vform, i);
1224 }
1225
1226 dst.ClearForWrite(vform_dst);
1227 dst.SetUint(vform_dst, 0, dst_val);
1228 return dst;
1229 }
1230
1231 LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst,
1232 const LogicVRegister& src, bool max) {
1233 int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1234 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1235 int64_t src_val = src.Int(vform, i);
1236 if (max) {
1237 dst_val = (src_val > dst_val) ? src_val : dst_val;
1238 } else {
1239 dst_val = (src_val < dst_val) ? src_val : dst_val;
1240 }
1241 }
1242 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1243 dst.SetInt(vform, 0, dst_val);
1244 return dst;
1245 }
1246
1247 LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst,
1248 const LogicVRegister& src) {
1249 SMinMaxV(vform, dst, src, true);
1250 return dst;
1251 }
1252
1253 LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst,
1254 const LogicVRegister& src) {
1255 SMinMaxV(vform, dst, src, false);
1256 return dst;
1257 }
1258
1259 LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst,
1260 const LogicVRegister& src1,
1261 const LogicVRegister& src2, bool max) {
1262 dst.ClearForWrite(vform);
1263 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1264 uint64_t src1_val = src1.Uint(vform, i);
1265 uint64_t src2_val = src2.Uint(vform, i);
1266 uint64_t dst_val;
1267 if (max) {
1268 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1269 } else {
1270 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1271 }
1272 dst.SetUint(vform, i, dst_val);
1273 }
1274 return dst;
1275 }
1276
1277 LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst,
1278 const LogicVRegister& src1,
1279 const LogicVRegister& src2) {
1280 return UMinMax(vform, dst, src1, src2, true);
1281 }
1282
1283 LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst,
1284 const LogicVRegister& src1,
1285 const LogicVRegister& src2) {
1286 return UMinMax(vform, dst, src1, src2, false);
1287 }
1288
1289 LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst,
1290 const LogicVRegister& src1,
1291 const LogicVRegister& src2, bool max) {
1292 int lanes = LaneCountFromFormat(vform);
1293 uint64_t result[kMaxLanesPerVector];
1294 const LogicVRegister* src = &src1;
1295 for (int j = 0; j < 2; j++) {
1296 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1297 uint64_t first_val = src->Uint(vform, i);
1298 uint64_t second_val = src->Uint(vform, i + 1);
1299 uint64_t dst_val;
1300 if (max) {
1301 dst_val = (first_val > second_val) ? first_val : second_val;
1302 } else {
1303 dst_val = (first_val < second_val) ? first_val : second_val;
1304 }
1305 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1306 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1307 }
1308 src = &src2;
1309 }
1310 dst.SetUintArray(vform, result);
1311 return dst;
1312 }
1313
1314 LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst,
1315 const LogicVRegister& src1,
1316 const LogicVRegister& src2) {
1317 return UMinMaxP(vform, dst, src1, src2, true);
1318 }
1319
1320 LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst,
1321 const LogicVRegister& src1,
1322 const LogicVRegister& src2) {
1323 return UMinMaxP(vform, dst, src1, src2, false);
1324 }
1325
1326 LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst,
1327 const LogicVRegister& src, bool max) {
1328 uint64_t dst_val = max ? 0 : UINT64_MAX;
1329 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1330 uint64_t src_val = src.Uint(vform, i);
1331 if (max) {
1332 dst_val = (src_val > dst_val) ? src_val : dst_val;
1333 } else {
1334 dst_val = (src_val < dst_val) ? src_val : dst_val;
1335 }
1336 }
1337 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1338 dst.SetUint(vform, 0, dst_val);
1339 return dst;
1340 }
1341
1342 LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst,
1343 const LogicVRegister& src) {
1344 UMinMaxV(vform, dst, src, true);
1345 return dst;
1346 }
1347
1348 LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst,
1349 const LogicVRegister& src) {
1350 UMinMaxV(vform, dst, src, false);
1351 return dst;
1352 }
1353
1354 LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst,
1355 const LogicVRegister& src, int shift) {
1356 DCHECK_GE(shift, 0);
1357 SimVRegister temp;
1358 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1359 return ushl(vform, dst, src, shiftreg);
1360 }
1361
1362 LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst,
1363 const LogicVRegister& src, int shift) {
1364 DCHECK_GE(shift, 0);
1365 SimVRegister temp1, temp2;
1366 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1367 LogicVRegister extendedreg = sxtl(vform, temp2, src);
1368 return sshl(vform, dst, extendedreg, shiftreg);
1369 }
1370
1371 LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst,
1372 const LogicVRegister& src, int shift) {
1373 DCHECK_GE(shift, 0);
1374 SimVRegister temp1, temp2;
1375 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1376 LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1377 return sshl(vform, dst, extendedreg, shiftreg);
1378 }
1379
1380 LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst,
1381 const LogicVRegister& src) {
1382 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1383 return sshll(vform, dst, src, shift);
1384 }
1385
1386 LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst,
1387 const LogicVRegister& src) {
1388 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1389 return sshll2(vform, dst, src, shift);
1390 }
1391
1392 LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst,
1393 const LogicVRegister& src, int shift) {
1394 DCHECK_GE(shift, 0);
1395 SimVRegister temp1, temp2;
1396 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1397 LogicVRegister extendedreg = uxtl(vform, temp2, src);
1398 return ushl(vform, dst, extendedreg, shiftreg);
1399 }
1400
1401 LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst,
1402 const LogicVRegister& src, int shift) {
1403 DCHECK_GE(shift, 0);
1404 SimVRegister temp1, temp2;
1405 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1406 LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1407 return ushl(vform, dst, extendedreg, shiftreg);
1408 }
1409
1410 LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst,
1411 const LogicVRegister& src, int shift) {
1412 dst.ClearForWrite(vform);
1413 int laneCount = LaneCountFromFormat(vform);
1414 for (int i = 0; i < laneCount; i++) {
1415 uint64_t src_lane = src.Uint(vform, i);
1416 uint64_t dst_lane = dst.Uint(vform, i);
1417 uint64_t shifted = src_lane << shift;
1418 uint64_t mask = MaxUintFromFormat(vform) << shift;
1419 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1420 }
1421 return dst;
1422 }
1423
1424 LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst,
1425 const LogicVRegister& src, int shift) {
1426 DCHECK_GE(shift, 0);
1427 SimVRegister temp;
1428 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1429 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1430 }
1431
1432 LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst,
1433 const LogicVRegister& src, int shift) {
1434 DCHECK_GE(shift, 0);
1435 SimVRegister temp;
1436 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1437 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1438 }
1439
1440 LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst,
1441 const LogicVRegister& src, int shift) {
1442 DCHECK_GE(shift, 0);
1443 SimVRegister temp;
1444 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1445 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1446 }
1447
1448 LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst,
1449 const LogicVRegister& src, int shift) {
1450 dst.ClearForWrite(vform);
1451 int laneCount = LaneCountFromFormat(vform);
1452 DCHECK((shift > 0) &&
1453 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1454 for (int i = 0; i < laneCount; i++) {
1455 uint64_t src_lane = src.Uint(vform, i);
1456 uint64_t dst_lane = dst.Uint(vform, i);
1457 uint64_t shifted;
1458 uint64_t mask;
1459 if (shift == 64) {
1460 shifted = 0;
1461 mask = 0;
1462 } else {
1463 shifted = src_lane >> shift;
1464 mask = MaxUintFromFormat(vform) >> shift;
1465 }
1466 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1467 }
1468 return dst;
1469 }
1470
1471 LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst,
1472 const LogicVRegister& src, int shift) {
1473 DCHECK_GE(shift, 0);
1474 SimVRegister temp;
1475 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1476 return ushl(vform, dst, src, shiftreg);
1477 }
1478
1479 LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst,
1480 const LogicVRegister& src, int shift) {
1481 DCHECK_GE(shift, 0);
1482 SimVRegister temp;
1483 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1484 return sshl(vform, dst, src, shiftreg);
1485 }
1486
1487 LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst,
1488 const LogicVRegister& src, int shift) {
1489 SimVRegister temp;
1490 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1491 return add(vform, dst, dst, shifted_reg);
1492 }
1493
1494 LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst,
1495 const LogicVRegister& src, int shift) {
1496 SimVRegister temp;
1497 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1498 return add(vform, dst, dst, shifted_reg);
1499 }
1500
1501 LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst,
1502 const LogicVRegister& src, int shift) {
1503 SimVRegister temp;
1504 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1505 return add(vform, dst, dst, shifted_reg);
1506 }
1507
1508 LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst,
1509 const LogicVRegister& src, int shift) {
1510 SimVRegister temp;
1511 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1512 return add(vform, dst, dst, shifted_reg);
1513 }
1514
1515 LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst,
1516 const LogicVRegister& src) {
1517 uint64_t result[16];
1518 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1519 int laneCount = LaneCountFromFormat(vform);
1520 for (int i = 0; i < laneCount; i++) {
1521 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1522 }
1523
1524 dst.SetUintArray(vform, result);
1525 return dst;
1526 }
1527
1528 LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst,
1529 const LogicVRegister& src) {
1530 uint64_t result[16];
1531 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1532 int laneCount = LaneCountFromFormat(vform);
1533 for (int i = 0; i < laneCount; i++) {
1534 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1535 }
1536
1537 dst.SetUintArray(vform, result);
1538 return dst;
1539 }
1540
1541 LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst,
1542 const LogicVRegister& src) {
1543 uint64_t result[16];
1544 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1545 int laneCount = LaneCountFromFormat(vform);
1546 for (int i = 0; i < laneCount; i++) {
1547 uint64_t value = src.Uint(vform, i);
1548 result[i] = 0;
1549 for (int j = 0; j < laneSizeInBits; j++) {
1550 result[i] += (value & 1);
1551 value >>= 1;
1552 }
1553 }
1554
1555 dst.SetUintArray(vform, result);
1556 return dst;
1557 }
1558
1559 LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst,
1560 const LogicVRegister& src1,
1561 const LogicVRegister& src2) {
1562 dst.ClearForWrite(vform);
1563 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1564 int8_t shift_val = src2.Int(vform, i);
1565 int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1566
1567 // Set signed saturation state.
1568 if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) &&
1569 (lj_src_val != 0)) {
1570 dst.SetSignedSat(i, lj_src_val >= 0);
1571 }
1572
1573 // Set unsigned saturation state.
1574 if (lj_src_val < 0) {
1575 dst.SetUnsignedSat(i, false);
1576 } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) &&
1577 (lj_src_val != 0)) {
1578 dst.SetUnsignedSat(i, true);
1579 }
1580
1581 int64_t src_val = src1.Int(vform, i);
1582 bool src_is_negative = src_val < 0;
1583 if (shift_val > 63) {
1584 dst.SetInt(vform, i, 0);
1585 } else if (shift_val < -63) {
1586 dst.SetRounding(i, src_is_negative);
1587 dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1588 } else {
1589 // Use unsigned types for shifts, as behaviour is undefined for signed
1590 // lhs.
1591 uint64_t usrc_val = static_cast<uint64_t>(src_val);
1592
1593 if (shift_val < 0) {
1594 // Convert to right shift.
1595 shift_val = -shift_val;
1596
1597 // Set rounding state by testing most-significant bit shifted out.
1598 // Rounding only needed on right shifts.
1599 if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1600 dst.SetRounding(i, true);
1601 }
1602
1603 usrc_val >>= shift_val;
1604
1605 if (src_is_negative) {
1606 // Simulate sign-extension.
1607 usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1608 }
1609 } else {
1610 usrc_val <<= shift_val;
1611 }
1612 dst.SetUint(vform, i, usrc_val);
1613 }
1614 }
1615 return dst;
1616 }
1617
1618 LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst,
1619 const LogicVRegister& src1,
1620 const LogicVRegister& src2) {
1621 dst.ClearForWrite(vform);
1622 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1623 int8_t shift_val = src2.Int(vform, i);
1624 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1625
1626 // Set saturation state.
1627 if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) {
1628 dst.SetUnsignedSat(i, true);
1629 }
1630
1631 uint64_t src_val = src1.Uint(vform, i);
1632 if ((shift_val > 63) || (shift_val < -64)) {
1633 dst.SetUint(vform, i, 0);
1634 } else {
1635 if (shift_val < 0) {
1636 // Set rounding state. Rounding only needed on right shifts.
1637 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1638 dst.SetRounding(i, true);
1639 }
1640
1641 if (shift_val == -64) {
1642 src_val = 0;
1643 } else {
1644 src_val >>= -shift_val;
1645 }
1646 } else {
1647 src_val <<= shift_val;
1648 }
1649 dst.SetUint(vform, i, src_val);
1650 }
1651 }
1652 return dst;
1653 }
1654
1655 LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst,
1656 const LogicVRegister& src) {
1657 dst.ClearForWrite(vform);
1658 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1659 // Test for signed saturation.
1660 int64_t sa = src.Int(vform, i);
1661 if (sa == MinIntFromFormat(vform)) {
1662 dst.SetSignedSat(i, true);
1663 }
1664 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1665 }
1666 return dst;
1667 }
1668
1669 LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst,
1670 const LogicVRegister& src) {
1671 dst.ClearForWrite(vform);
1672 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1673 int64_t sa = dst.IntLeftJustified(vform, i);
1674 uint64_t ub = src.UintLeftJustified(vform, i);
1675 uint64_t ur = sa + ub;
1676
1677 int64_t sr = bit_cast<int64_t>(ur);
1678 if (sr < sa) { // Test for signed positive saturation.
1679 dst.SetInt(vform, i, MaxIntFromFormat(vform));
1680 } else {
1681 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
1682 }
1683 }
1684 return dst;
1685 }
1686
1687 LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst,
1688 const LogicVRegister& src) {
1689 dst.ClearForWrite(vform);
1690 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1691 uint64_t ua = dst.UintLeftJustified(vform, i);
1692 int64_t sb = src.IntLeftJustified(vform, i);
1693 uint64_t ur = ua + sb;
1694
1695 if ((sb > 0) && (ur <= ua)) {
1696 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
1697 } else if ((sb < 0) && (ur >= ua)) {
1698 dst.SetUint(vform, i, 0); // Negative saturation.
1699 } else {
1700 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
1701 }
1702 }
1703 return dst;
1704 }
1705
1706 LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst,
1707 const LogicVRegister& src) {
1708 dst.ClearForWrite(vform);
1709 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1710 // Test for signed saturation.
1711 int64_t sa = src.Int(vform, i);
1712 if (sa == MinIntFromFormat(vform)) {
1713 dst.SetSignedSat(i, true);
1714 }
1715 if (sa < 0) {
1716 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1717 } else {
1718 dst.SetInt(vform, i, sa);
1719 }
1720 }
1721 return dst;
1722 }
1723
1724 LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform,
1725 LogicVRegister dst, bool dstIsSigned,
1726 const LogicVRegister& src,
1727 bool srcIsSigned) {
1728 bool upperhalf = false;
1729 VectorFormat srcform = kFormatUndefined;
1730 int64_t ssrc[8];
1731 uint64_t usrc[8];
1732
1733 switch (dstform) {
1734 case kFormat8B:
1735 upperhalf = false;
1736 srcform = kFormat8H;
1737 break;
1738 case kFormat16B:
1739 upperhalf = true;
1740 srcform = kFormat8H;
1741 break;
1742 case kFormat4H:
1743 upperhalf = false;
1744 srcform = kFormat4S;
1745 break;
1746 case kFormat8H:
1747 upperhalf = true;
1748 srcform = kFormat4S;
1749 break;
1750 case kFormat2S:
1751 upperhalf = false;
1752 srcform = kFormat2D;
1753 break;
1754 case kFormat4S:
1755 upperhalf = true;
1756 srcform = kFormat2D;
1757 break;
1758 case kFormatB:
1759 upperhalf = false;
1760 srcform = kFormatH;
1761 break;
1762 case kFormatH:
1763 upperhalf = false;
1764 srcform = kFormatS;
1765 break;
1766 case kFormatS:
1767 upperhalf = false;
1768 srcform = kFormatD;
1769 break;
1770 default:
1771 UNIMPLEMENTED();
1772 }
1773
1774 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1775 ssrc[i] = src.Int(srcform, i);
1776 usrc[i] = src.Uint(srcform, i);
1777 }
1778
1779 int offset;
1780 if (upperhalf) {
1781 offset = LaneCountFromFormat(dstform) / 2;
1782 } else {
1783 offset = 0;
1784 dst.ClearForWrite(dstform);
1785 }
1786
1787 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1788 // Test for signed saturation
1789 if (ssrc[i] > MaxIntFromFormat(dstform)) {
1790 dst.SetSignedSat(offset + i, true);
1791 } else if (ssrc[i] < MinIntFromFormat(dstform)) {
1792 dst.SetSignedSat(offset + i, false);
1793 }
1794
1795 // Test for unsigned saturation
1796 if (srcIsSigned) {
1797 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
1798 dst.SetUnsignedSat(offset + i, true);
1799 } else if (ssrc[i] < 0) {
1800 dst.SetUnsignedSat(offset + i, false);
1801 }
1802 } else {
1803 if (usrc[i] > MaxUintFromFormat(dstform)) {
1804 dst.SetUnsignedSat(offset + i, true);
1805 }
1806 }
1807
1808 int64_t result;
1809 if (srcIsSigned) {
1810 result = ssrc[i] & MaxUintFromFormat(dstform);
1811 } else {
1812 result = usrc[i] & MaxUintFromFormat(dstform);
1813 }
1814
1815 if (dstIsSigned) {
1816 dst.SetInt(dstform, offset + i, result);
1817 } else {
1818 dst.SetUint(dstform, offset + i, result);
1819 }
1820 }
1821 return dst;
1822 }
1823
1824 LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst,
1825 const LogicVRegister& src) {
1826 return ExtractNarrow(vform, dst, true, src, true);
1827 }
1828
1829 LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst,
1830 const LogicVRegister& src) {
1831 return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform);
1832 }
1833
1834 LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst,
1835 const LogicVRegister& src) {
1836 return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
1837 }
1838
1839 LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst,
1840 const LogicVRegister& src) {
1841 return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
1842 }
1843
1844 LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst,
1845 const LogicVRegister& src1,
1846 const LogicVRegister& src2, bool issigned) {
1847 dst.ClearForWrite(vform);
1848 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1849 if (issigned) {
1850 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
1851 sr = sr > 0 ? sr : -sr;
1852 dst.SetInt(vform, i, sr);
1853 } else {
1854 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
1855 sr = sr > 0 ? sr : -sr;
1856 dst.SetUint(vform, i, sr);
1857 }
1858 }
1859 return dst;
1860 }
1861
1862 LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst,
1863 const LogicVRegister& src1,
1864 const LogicVRegister& src2) {
1865 SimVRegister temp;
1866 dst.ClearForWrite(vform);
1867 AbsDiff(vform, temp, src1, src2, true);
1868 add(vform, dst, dst, temp);
1869 return dst;
1870 }
1871
1872 LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst,
1873 const LogicVRegister& src1,
1874 const LogicVRegister& src2) {
1875 SimVRegister temp;
1876 dst.ClearForWrite(vform);
1877 AbsDiff(vform, temp, src1, src2, false);
1878 add(vform, dst, dst, temp);
1879 return dst;
1880 }
1881
1882 LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst,
1883 const LogicVRegister& src) {
1884 dst.ClearForWrite(vform);
1885 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1886 dst.SetUint(vform, i, ~src.Uint(vform, i));
1887 }
1888 return dst;
1889 }
1890
1891 LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst,
1892 const LogicVRegister& src) {
1893 uint64_t result[16];
1894 int laneCount = LaneCountFromFormat(vform);
1895 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1896 uint64_t reversed_value;
1897 uint64_t value;
1898 for (int i = 0; i < laneCount; i++) {
1899 value = src.Uint(vform, i);
1900 reversed_value = 0;
1901 for (int j = 0; j < laneSizeInBits; j++) {
1902 reversed_value = (reversed_value << 1) | (value & 1);
1903 value >>= 1;
1904 }
1905 result[i] = reversed_value;
1906 }
1907
1908 dst.SetUintArray(vform, result);
1909 return dst;
1910 }
1911
1912 LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst,
1913 const LogicVRegister& src, int revSize) {
1914 uint64_t result[16];
1915 int laneCount = LaneCountFromFormat(vform);
1916 int laneSize = LaneSizeInBytesFromFormat(vform);
1917 int lanesPerLoop = revSize / laneSize;
1918 for (int i = 0; i < laneCount; i += lanesPerLoop) {
1919 for (int j = 0; j < lanesPerLoop; j++) {
1920 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
1921 }
1922 }
1923 dst.SetUintArray(vform, result);
1924 return dst;
1925 }
1926
1927 LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst,
1928 const LogicVRegister& src) {
1929 return rev(vform, dst, src, 2);
1930 }
1931
1932 LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst,
1933 const LogicVRegister& src) {
1934 return rev(vform, dst, src, 4);
1935 }
1936
1937 LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst,
1938 const LogicVRegister& src) {
1939 return rev(vform, dst, src, 8);
1940 }
1941
1942 LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst,
1943 const LogicVRegister& src, bool is_signed,
1944 bool do_accumulate) {
1945 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
1946 DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U);
1947 DCHECK_LE(LaneCountFromFormat(vform), 8);
1948
1949 uint64_t result[8];
1950 int lane_count = LaneCountFromFormat(vform);
1951 for (int i = 0; i < lane_count; i++) {
1952 if (is_signed) {
1953 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
1954 src.Int(vformsrc, 2 * i + 1));
1955 } else {
1956 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
1957 }
1958 }
1959
1960 dst.ClearForWrite(vform);
1961 for (int i = 0; i < lane_count; ++i) {
1962 if (do_accumulate) {
1963 result[i] += dst.Uint(vform, i);
1964 }
1965 dst.SetUint(vform, i, result[i]);
1966 }
1967
1968 return dst;
1969 }
1970
1971 LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst,
1972 const LogicVRegister& src) {
1973 return addlp(vform, dst, src, true, false);
1974 }
1975
1976 LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst,
1977 const LogicVRegister& src) {
1978 return addlp(vform, dst, src, false, false);
1979 }
1980
1981 LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst,
1982 const LogicVRegister& src) {
1983 return addlp(vform, dst, src, true, true);
1984 }
1985
1986 LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst,
1987 const LogicVRegister& src) {
1988 return addlp(vform, dst, src, false, true);
1989 }
1990
1991 LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst,
1992 const LogicVRegister& src1,
1993 const LogicVRegister& src2, int index) {
1994 uint8_t result[16];
1995 int laneCount = LaneCountFromFormat(vform);
1996 for (int i = 0; i < laneCount - index; ++i) {
1997 result[i] = src1.Uint(vform, i + index);
1998 }
1999 for (int i = 0; i < index; ++i) {
2000 result[laneCount - index + i] = src2.Uint(vform, i);
2001 }
2002 dst.ClearForWrite(vform);
2003 for (int i = 0; i < laneCount; ++i) {
2004 dst.SetUint(vform, i, result[i]);
2005 }
2006 return dst;
2007 }
2008
2009 LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst,
2010 const LogicVRegister& src,
2011 int src_index) {
2012 int laneCount = LaneCountFromFormat(vform);
2013 uint64_t value = src.Uint(vform, src_index);
2014 dst.ClearForWrite(vform);
2015 for (int i = 0; i < laneCount; ++i) {
2016 dst.SetUint(vform, i, value);
2017 }
2018 return dst;
2019 }
2020
2021 LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst,
2022 uint64_t imm) {
2023 int laneCount = LaneCountFromFormat(vform);
2024 uint64_t value = imm & MaxUintFromFormat(vform);
2025 dst.ClearForWrite(vform);
2026 for (int i = 0; i < laneCount; ++i) {
2027 dst.SetUint(vform, i, value);
2028 }
2029 return dst;
2030 }
2031
2032 LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst,
2033 int dst_index, const LogicVRegister& src,
2034 int src_index) {
2035 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2036 return dst;
2037 }
2038
2039 LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst,
2040 int dst_index, uint64_t imm) {
2041 uint64_t value = imm & MaxUintFromFormat(vform);
2042 dst.SetUint(vform, dst_index, value);
2043 return dst;
2044 }
2045
2046 LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst,
2047 uint64_t imm) {
2048 int laneCount = LaneCountFromFormat(vform);
2049 dst.ClearForWrite(vform);
2050 for (int i = 0; i < laneCount; ++i) {
2051 dst.SetUint(vform, i, imm);
2052 }
2053 return dst;
2054 }
2055
2056 LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst,
2057 uint64_t imm) {
2058 int laneCount = LaneCountFromFormat(vform);
2059 dst.ClearForWrite(vform);
2060 for (int i = 0; i < laneCount; ++i) {
2061 dst.SetUint(vform, i, ~imm);
2062 }
2063 return dst;
2064 }
2065
2066 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
2067 const LogicVRegister& src, uint64_t imm) {
2068 uint64_t result[16];
2069 int laneCount = LaneCountFromFormat(vform);
2070 for (int i = 0; i < laneCount; ++i) {
2071 result[i] = src.Uint(vform, i) | imm;
2072 }
2073 dst.SetUintArray(vform, result);
2074 return dst;
2075 }
2076
2077 LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst,
2078 const LogicVRegister& src) {
2079 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2080
2081 dst.ClearForWrite(vform);
2082 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2083 dst.SetUint(vform, i, src.Uint(vform_half, i));
2084 }
2085 return dst;
2086 }
2087
2088 LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst,
2089 const LogicVRegister& src) {
2090 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2091
2092 dst.ClearForWrite(vform);
2093 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2094 dst.SetInt(vform, i, src.Int(vform_half, i));
2095 }
2096 return dst;
2097 }
2098
2099 LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst,
2100 const LogicVRegister& src) {
2101 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2102 int lane_count = LaneCountFromFormat(vform);
2103
2104 dst.ClearForWrite(vform);
2105 for (int i = 0; i < lane_count; i++) {
2106 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2107 }
2108 return dst;
2109 }
2110
2111 LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst,
2112 const LogicVRegister& src) {
2113 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2114 int lane_count = LaneCountFromFormat(vform);
2115
2116 dst.ClearForWrite(vform);
2117 for (int i = 0; i < lane_count; i++) {
2118 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2119 }
2120 return dst;
2121 }
2122
2123 LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst,
2124 const LogicVRegister& src, int shift) {
2125 SimVRegister temp;
2126 VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2127 VectorFormat vform_dst = vform;
2128 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2129 return ExtractNarrow(vform_dst, dst, false, shifted_src, false);
2130 }
2131
2132 LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst,
2133 const LogicVRegister& src, int shift) {
2134 SimVRegister temp;
2135 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2136 VectorFormat vformdst = vform;
2137 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2138 return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2139 }
2140
2141 LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst,
2142 const LogicVRegister& src, int shift) {
2143 SimVRegister temp;
2144 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2145 VectorFormat vformdst = vform;
2146 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2147 return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2148 }
2149
2150 LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst,
2151 const LogicVRegister& src, int shift) {
2152 SimVRegister temp;
2153 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2154 VectorFormat vformdst = vform;
2155 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2156 return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2157 }
2158
2159 LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst,
2160 const LogicVRegister& ind,
2161 bool zero_out_of_bounds,
2162 const LogicVRegister* tab1,
2163 const LogicVRegister* tab2,
2164 const LogicVRegister* tab3,
2165 const LogicVRegister* tab4) {
2166 DCHECK_NOT_NULL(tab1);
2167 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
2168 uint64_t result[kMaxLanesPerVector];
2169 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2170 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
2171 }
2172 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2173 uint64_t j = ind.Uint(vform, i);
2174 int tab_idx = static_cast<int>(j >> 4);
2175 int j_idx = static_cast<int>(j & 15);
2176 if ((tab_idx < 4) && (tab[tab_idx] != NULL)) {
2177 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
2178 }
2179 }
2180 dst.SetUintArray(vform, result);
2181 return dst;
2182 }
2183
2184 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2185 const LogicVRegister& tab,
2186 const LogicVRegister& ind) {
2187 return Table(vform, dst, ind, true, &tab);
2188 }
2189
2190 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2191 const LogicVRegister& tab,
2192 const LogicVRegister& tab2,
2193 const LogicVRegister& ind) {
2194 return Table(vform, dst, ind, true, &tab, &tab2);
2195 }
2196
2197 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2198 const LogicVRegister& tab,
2199 const LogicVRegister& tab2,
2200 const LogicVRegister& tab3,
2201 const LogicVRegister& ind) {
2202 return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
2203 }
2204
2205 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2206 const LogicVRegister& tab,
2207 const LogicVRegister& tab2,
2208 const LogicVRegister& tab3,
2209 const LogicVRegister& tab4,
2210 const LogicVRegister& ind) {
2211 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
2212 }
2213
2214 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2215 const LogicVRegister& tab,
2216 const LogicVRegister& ind) {
2217 return Table(vform, dst, ind, false, &tab);
2218 }
2219
2220 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2221 const LogicVRegister& tab,
2222 const LogicVRegister& tab2,
2223 const LogicVRegister& ind) {
2224 return Table(vform, dst, ind, false, &tab, &tab2);
2225 }
2226
2227 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2228 const LogicVRegister& tab,
2229 const LogicVRegister& tab2,
2230 const LogicVRegister& tab3,
2231 const LogicVRegister& ind) {
2232 return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
2233 }
2234
2235 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2236 const LogicVRegister& tab,
2237 const LogicVRegister& tab2,
2238 const LogicVRegister& tab3,
2239 const LogicVRegister& tab4,
2240 const LogicVRegister& ind) {
2241 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
2242 }
2243
2244 LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst,
2245 const LogicVRegister& src, int shift) {
2246 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2247 }
2248
2249 LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst,
2250 const LogicVRegister& src, int shift) {
2251 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2252 }
2253
2254 LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst,
2255 const LogicVRegister& src, int shift) {
2256 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2257 }
2258
2259 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst,
2260 const LogicVRegister& src, int shift) {
2261 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2262 }
2263
2264 LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst,
2265 const LogicVRegister& src, int shift) {
2266 SimVRegister temp;
2267 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2268 VectorFormat vformdst = vform;
2269 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2270 return sqxtn(vformdst, dst, shifted_src);
2271 }
2272
2273 LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst,
2274 const LogicVRegister& src, int shift) {
2275 SimVRegister temp;
2276 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2277 VectorFormat vformdst = vform;
2278 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2279 return sqxtn(vformdst, dst, shifted_src);
2280 }
2281
2282 LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst,
2283 const LogicVRegister& src, int shift) {
2284 SimVRegister temp;
2285 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2286 VectorFormat vformdst = vform;
2287 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2288 return sqxtn(vformdst, dst, shifted_src);
2289 }
2290
2291 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst,
2292 const LogicVRegister& src, int shift) {
2293 SimVRegister temp;
2294 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2295 VectorFormat vformdst = vform;
2296 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2297 return sqxtn(vformdst, dst, shifted_src);
2298 }
2299
2300 LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst,
2301 const LogicVRegister& src, int shift) {
2302 SimVRegister temp;
2303 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2304 VectorFormat vformdst = vform;
2305 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2306 return sqxtun(vformdst, dst, shifted_src);
2307 }
2308
2309 LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst,
2310 const LogicVRegister& src, int shift) {
2311 SimVRegister temp;
2312 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2313 VectorFormat vformdst = vform;
2314 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2315 return sqxtun(vformdst, dst, shifted_src);
2316 }
2317
2318 LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst,
2319 const LogicVRegister& src, int shift) {
2320 SimVRegister temp;
2321 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2322 VectorFormat vformdst = vform;
2323 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2324 return sqxtun(vformdst, dst, shifted_src);
2325 }
2326
2327 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst,
2328 const LogicVRegister& src, int shift) {
2329 SimVRegister temp;
2330 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2331 VectorFormat vformdst = vform;
2332 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2333 return sqxtun(vformdst, dst, shifted_src);
2334 }
2335
2336 LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst,
2337 const LogicVRegister& src1,
2338 const LogicVRegister& src2) {
2339 SimVRegister temp1, temp2;
2340 uxtl(vform, temp1, src1);
2341 uxtl(vform, temp2, src2);
2342 add(vform, dst, temp1, temp2);
2343 return dst;
2344 }
2345
2346 LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst,
2347 const LogicVRegister& src1,
2348 const LogicVRegister& src2) {
2349 SimVRegister temp1, temp2;
2350 uxtl2(vform, temp1, src1);
2351 uxtl2(vform, temp2, src2);
2352 add(vform, dst, temp1, temp2);
2353 return dst;
2354 }
2355
2356 LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst,
2357 const LogicVRegister& src1,
2358 const LogicVRegister& src2) {
2359 SimVRegister temp;
2360 uxtl(vform, temp, src2);
2361 add(vform, dst, src1, temp);
2362 return dst;
2363 }
2364
2365 LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst,
2366 const LogicVRegister& src1,
2367 const LogicVRegister& src2) {
2368 SimVRegister temp;
2369 uxtl2(vform, temp, src2);
2370 add(vform, dst, src1, temp);
2371 return dst;
2372 }
2373
2374 LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst,
2375 const LogicVRegister& src1,
2376 const LogicVRegister& src2) {
2377 SimVRegister temp1, temp2;
2378 sxtl(vform, temp1, src1);
2379 sxtl(vform, temp2, src2);
2380 add(vform, dst, temp1, temp2);
2381 return dst;
2382 }
2383
2384 LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst,
2385 const LogicVRegister& src1,
2386 const LogicVRegister& src2) {
2387 SimVRegister temp1, temp2;
2388 sxtl2(vform, temp1, src1);
2389 sxtl2(vform, temp2, src2);
2390 add(vform, dst, temp1, temp2);
2391 return dst;
2392 }
2393
2394 LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst,
2395 const LogicVRegister& src1,
2396 const LogicVRegister& src2) {
2397 SimVRegister temp;
2398 sxtl(vform, temp, src2);
2399 add(vform, dst, src1, temp);
2400 return dst;
2401 }
2402
2403 LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst,
2404 const LogicVRegister& src1,
2405 const LogicVRegister& src2) {
2406 SimVRegister temp;
2407 sxtl2(vform, temp, src2);
2408 add(vform, dst, src1, temp);
2409 return dst;
2410 }
2411
2412 LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst,
2413 const LogicVRegister& src1,
2414 const LogicVRegister& src2) {
2415 SimVRegister temp1, temp2;
2416 uxtl(vform, temp1, src1);
2417 uxtl(vform, temp2, src2);
2418 sub(vform, dst, temp1, temp2);
2419 return dst;
2420 }
2421
2422 LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst,
2423 const LogicVRegister& src1,
2424 const LogicVRegister& src2) {
2425 SimVRegister temp1, temp2;
2426 uxtl2(vform, temp1, src1);
2427 uxtl2(vform, temp2, src2);
2428 sub(vform, dst, temp1, temp2);
2429 return dst;
2430 }
2431
2432 LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst,
2433 const LogicVRegister& src1,
2434 const LogicVRegister& src2) {
2435 SimVRegister temp;
2436 uxtl(vform, temp, src2);
2437 sub(vform, dst, src1, temp);
2438 return dst;
2439 }
2440
2441 LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst,
2442 const LogicVRegister& src1,
2443 const LogicVRegister& src2) {
2444 SimVRegister temp;
2445 uxtl2(vform, temp, src2);
2446 sub(vform, dst, src1, temp);
2447 return dst;
2448 }
2449
2450 LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst,
2451 const LogicVRegister& src1,
2452 const LogicVRegister& src2) {
2453 SimVRegister temp1, temp2;
2454 sxtl(vform, temp1, src1);
2455 sxtl(vform, temp2, src2);
2456 sub(vform, dst, temp1, temp2);
2457 return dst;
2458 }
2459
2460 LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst,
2461 const LogicVRegister& src1,
2462 const LogicVRegister& src2) {
2463 SimVRegister temp1, temp2;
2464 sxtl2(vform, temp1, src1);
2465 sxtl2(vform, temp2, src2);
2466 sub(vform, dst, temp1, temp2);
2467 return dst;
2468 }
2469
2470 LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst,
2471 const LogicVRegister& src1,
2472 const LogicVRegister& src2) {
2473 SimVRegister temp;
2474 sxtl(vform, temp, src2);
2475 sub(vform, dst, src1, temp);
2476 return dst;
2477 }
2478
2479 LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst,
2480 const LogicVRegister& src1,
2481 const LogicVRegister& src2) {
2482 SimVRegister temp;
2483 sxtl2(vform, temp, src2);
2484 sub(vform, dst, src1, temp);
2485 return dst;
2486 }
2487
2488 LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst,
2489 const LogicVRegister& src1,
2490 const LogicVRegister& src2) {
2491 SimVRegister temp1, temp2;
2492 uxtl(vform, temp1, src1);
2493 uxtl(vform, temp2, src2);
2494 uaba(vform, dst, temp1, temp2);
2495 return dst;
2496 }
2497
2498 LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst,
2499 const LogicVRegister& src1,
2500 const LogicVRegister& src2) {
2501 SimVRegister temp1, temp2;
2502 uxtl2(vform, temp1, src1);
2503 uxtl2(vform, temp2, src2);
2504 uaba(vform, dst, temp1, temp2);
2505 return dst;
2506 }
2507
2508 LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst,
2509 const LogicVRegister& src1,
2510 const LogicVRegister& src2) {
2511 SimVRegister temp1, temp2;
2512 sxtl(vform, temp1, src1);
2513 sxtl(vform, temp2, src2);
2514 saba(vform, dst, temp1, temp2);
2515 return dst;
2516 }
2517
2518 LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst,
2519 const LogicVRegister& src1,
2520 const LogicVRegister& src2) {
2521 SimVRegister temp1, temp2;
2522 sxtl2(vform, temp1, src1);
2523 sxtl2(vform, temp2, src2);
2524 saba(vform, dst, temp1, temp2);
2525 return dst;
2526 }
2527
2528 LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst,
2529 const LogicVRegister& src1,
2530 const LogicVRegister& src2) {
2531 SimVRegister temp1, temp2;
2532 uxtl(vform, temp1, src1);
2533 uxtl(vform, temp2, src2);
2534 AbsDiff(vform, dst, temp1, temp2, false);
2535 return dst;
2536 }
2537
2538 LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst,
2539 const LogicVRegister& src1,
2540 const LogicVRegister& src2) {
2541 SimVRegister temp1, temp2;
2542 uxtl2(vform, temp1, src1);
2543 uxtl2(vform, temp2, src2);
2544 AbsDiff(vform, dst, temp1, temp2, false);
2545 return dst;
2546 }
2547
2548 LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst,
2549 const LogicVRegister& src1,
2550 const LogicVRegister& src2) {
2551 SimVRegister temp1, temp2;
2552 sxtl(vform, temp1, src1);
2553 sxtl(vform, temp2, src2);
2554 AbsDiff(vform, dst, temp1, temp2, true);
2555 return dst;
2556 }
2557
2558 LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst,
2559 const LogicVRegister& src1,
2560 const LogicVRegister& src2) {
2561 SimVRegister temp1, temp2;
2562 sxtl2(vform, temp1, src1);
2563 sxtl2(vform, temp2, src2);
2564 AbsDiff(vform, dst, temp1, temp2, true);
2565 return dst;
2566 }
2567
2568 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
2569 const LogicVRegister& src1,
2570 const LogicVRegister& src2) {
2571 SimVRegister temp1, temp2;
2572 uxtl(vform, temp1, src1);
2573 uxtl(vform, temp2, src2);
2574 mul(vform, dst, temp1, temp2);
2575 return dst;
2576 }
2577
2578 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
2579 const LogicVRegister& src1,
2580 const LogicVRegister& src2) {
2581 SimVRegister temp1, temp2;
2582 uxtl2(vform, temp1, src1);
2583 uxtl2(vform, temp2, src2);
2584 mul(vform, dst, temp1, temp2);
2585 return dst;
2586 }
2587
2588 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
2589 const LogicVRegister& src1,
2590 const LogicVRegister& src2) {
2591 SimVRegister temp1, temp2;
2592 sxtl(vform, temp1, src1);
2593 sxtl(vform, temp2, src2);
2594 mul(vform, dst, temp1, temp2);
2595 return dst;
2596 }
2597
2598 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
2599 const LogicVRegister& src1,
2600 const LogicVRegister& src2) {
2601 SimVRegister temp1, temp2;
2602 sxtl2(vform, temp1, src1);
2603 sxtl2(vform, temp2, src2);
2604 mul(vform, dst, temp1, temp2);
2605 return dst;
2606 }
2607
2608 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
2609 const LogicVRegister& src1,
2610 const LogicVRegister& src2) {
2611 SimVRegister temp1, temp2;
2612 uxtl(vform, temp1, src1);
2613 uxtl(vform, temp2, src2);
2614 mls(vform, dst, temp1, temp2);
2615 return dst;
2616 }
2617
2618 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
2619 const LogicVRegister& src1,
2620 const LogicVRegister& src2) {
2621 SimVRegister temp1, temp2;
2622 uxtl2(vform, temp1, src1);
2623 uxtl2(vform, temp2, src2);
2624 mls(vform, dst, temp1, temp2);
2625 return dst;
2626 }
2627
2628 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
2629 const LogicVRegister& src1,
2630 const LogicVRegister& src2) {
2631 SimVRegister temp1, temp2;
2632 sxtl(vform, temp1, src1);
2633 sxtl(vform, temp2, src2);
2634 mls(vform, dst, temp1, temp2);
2635 return dst;
2636 }
2637
2638 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
2639 const LogicVRegister& src1,
2640 const LogicVRegister& src2) {
2641 SimVRegister temp1, temp2;
2642 sxtl2(vform, temp1, src1);
2643 sxtl2(vform, temp2, src2);
2644 mls(vform, dst, temp1, temp2);
2645 return dst;
2646 }
2647
2648 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
2649 const LogicVRegister& src1,
2650 const LogicVRegister& src2) {
2651 SimVRegister temp1, temp2;
2652 uxtl(vform, temp1, src1);
2653 uxtl(vform, temp2, src2);
2654 mla(vform, dst, temp1, temp2);
2655 return dst;
2656 }
2657
2658 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
2659 const LogicVRegister& src1,
2660 const LogicVRegister& src2) {
2661 SimVRegister temp1, temp2;
2662 uxtl2(vform, temp1, src1);
2663 uxtl2(vform, temp2, src2);
2664 mla(vform, dst, temp1, temp2);
2665 return dst;
2666 }
2667
2668 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
2669 const LogicVRegister& src1,
2670 const LogicVRegister& src2) {
2671 SimVRegister temp1, temp2;
2672 sxtl(vform, temp1, src1);
2673 sxtl(vform, temp2, src2);
2674 mla(vform, dst, temp1, temp2);
2675 return dst;
2676 }
2677
2678 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
2679 const LogicVRegister& src1,
2680 const LogicVRegister& src2) {
2681 SimVRegister temp1, temp2;
2682 sxtl2(vform, temp1, src1);
2683 sxtl2(vform, temp2, src2);
2684 mla(vform, dst, temp1, temp2);
2685 return dst;
2686 }
2687
2688 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
2689 const LogicVRegister& src1,
2690 const LogicVRegister& src2) {
2691 SimVRegister temp;
2692 LogicVRegister product = sqdmull(vform, temp, src1, src2);
2693 return add(vform, dst, dst, product).SignedSaturate(vform);
2694 }
2695
2696 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
2697 const LogicVRegister& src1,
2698 const LogicVRegister& src2) {
2699 SimVRegister temp;
2700 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2701 return add(vform, dst, dst, product).SignedSaturate(vform);
2702 }
2703
2704 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
2705 const LogicVRegister& src1,
2706 const LogicVRegister& src2) {
2707 SimVRegister temp;
2708 LogicVRegister product = sqdmull(vform, temp, src1, src2);
2709 return sub(vform, dst, dst, product).SignedSaturate(vform);
2710 }
2711
2712 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
2713 const LogicVRegister& src1,
2714 const LogicVRegister& src2) {
2715 SimVRegister temp;
2716 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2717 return sub(vform, dst, dst, product).SignedSaturate(vform);
2718 }
2719
2720 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
2721 const LogicVRegister& src1,
2722 const LogicVRegister& src2) {
2723 SimVRegister temp;
2724 LogicVRegister product = smull(vform, temp, src1, src2);
2725 return add(vform, dst, product, product).SignedSaturate(vform);
2726 }
2727
2728 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
2729 const LogicVRegister& src1,
2730 const LogicVRegister& src2) {
2731 SimVRegister temp;
2732 LogicVRegister product = smull2(vform, temp, src1, src2);
2733 return add(vform, dst, product, product).SignedSaturate(vform);
2734 }
2735
2736 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
2737 const LogicVRegister& src1,
2738 const LogicVRegister& src2, bool round) {
2739 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
2740 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
2741 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
2742
2743 int esize = LaneSizeInBitsFromFormat(vform);
2744 int round_const = round ? (1 << (esize - 2)) : 0;
2745 int64_t product;
2746
2747 dst.ClearForWrite(vform);
2748 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2749 product = src1.Int(vform, i) * src2.Int(vform, i);
2750 product += round_const;
2751 product = product >> (esize - 1);
2752
2753 if (product > MaxIntFromFormat(vform)) {
2754 product = MaxIntFromFormat(vform);
2755 } else if (product < MinIntFromFormat(vform)) {
2756 product = MinIntFromFormat(vform);
2757 }
2758 dst.SetInt(vform, i, product);
2759 }
2760 return dst;
2761 }
2762
2763 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
2764 const LogicVRegister& src1,
2765 const LogicVRegister& src2) {
2766 return sqrdmulh(vform, dst, src1, src2, false);
2767 }
2768
2769 LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst,
2770 const LogicVRegister& src1,
2771 const LogicVRegister& src2) {
2772 SimVRegister temp;
2773 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2774 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2775 return dst;
2776 }
2777
2778 LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst,
2779 const LogicVRegister& src1,
2780 const LogicVRegister& src2) {
2781 SimVRegister temp;
2782 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2783 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2784 return dst;
2785 }
2786
2787 LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst,
2788 const LogicVRegister& src1,
2789 const LogicVRegister& src2) {
2790 SimVRegister temp;
2791 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2792 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2793 return dst;
2794 }
2795
2796 LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst,
2797 const LogicVRegister& src1,
2798 const LogicVRegister& src2) {
2799 SimVRegister temp;
2800 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2801 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2802 return dst;
2803 }
2804
2805 LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst,
2806 const LogicVRegister& src1,
2807 const LogicVRegister& src2) {
2808 SimVRegister temp;
2809 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2810 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2811 return dst;
2812 }
2813
2814 LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst,
2815 const LogicVRegister& src1,
2816 const LogicVRegister& src2) {
2817 SimVRegister temp;
2818 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2819 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2820 return dst;
2821 }
2822
2823 LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst,
2824 const LogicVRegister& src1,
2825 const LogicVRegister& src2) {
2826 SimVRegister temp;
2827 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2828 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2829 return dst;
2830 }
2831
2832 LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst,
2833 const LogicVRegister& src1,
2834 const LogicVRegister& src2) {
2835 SimVRegister temp;
2836 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2837 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2838 return dst;
2839 }
2840
2841 LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst,
2842 const LogicVRegister& src1,
2843 const LogicVRegister& src2) {
2844 uint64_t result[16];
2845 int laneCount = LaneCountFromFormat(vform);
2846 int pairs = laneCount / 2;
2847 for (int i = 0; i < pairs; ++i) {
2848 result[2 * i] = src1.Uint(vform, 2 * i);
2849 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
2850 }
2851
2852 dst.SetUintArray(vform, result);
2853 return dst;
2854 }
2855
2856 LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst,
2857 const LogicVRegister& src1,
2858 const LogicVRegister& src2) {
2859 uint64_t result[16];
2860 int laneCount = LaneCountFromFormat(vform);
2861 int pairs = laneCount / 2;
2862 for (int i = 0; i < pairs; ++i) {
2863 result[2 * i] = src1.Uint(vform, (2 * i) + 1);
2864 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
2865 }
2866
2867 dst.SetUintArray(vform, result);
2868 return dst;
2869 }
2870
2871 LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst,
2872 const LogicVRegister& src1,
2873 const LogicVRegister& src2) {
2874 uint64_t result[16];
2875 int laneCount = LaneCountFromFormat(vform);
2876 int pairs = laneCount / 2;
2877 for (int i = 0; i < pairs; ++i) {
2878 result[2 * i] = src1.Uint(vform, i);
2879 result[(2 * i) + 1] = src2.Uint(vform, i);
2880 }
2881
2882 dst.SetUintArray(vform, result);
2883 return dst;
2884 }
2885
2886 LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst,
2887 const LogicVRegister& src1,
2888 const LogicVRegister& src2) {
2889 uint64_t result[16];
2890 int laneCount = LaneCountFromFormat(vform);
2891 int pairs = laneCount / 2;
2892 for (int i = 0; i < pairs; ++i) {
2893 result[2 * i] = src1.Uint(vform, pairs + i);
2894 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
2895 }
2896
2897 dst.SetUintArray(vform, result);
2898 return dst;
2899 }
2900
2901 LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst,
2902 const LogicVRegister& src1,
2903 const LogicVRegister& src2) {
2904 uint64_t result[32];
2905 int laneCount = LaneCountFromFormat(vform);
2906 for (int i = 0; i < laneCount; ++i) {
2907 result[i] = src1.Uint(vform, i);
2908 result[laneCount + i] = src2.Uint(vform, i);
2909 }
2910
2911 dst.ClearForWrite(vform);
2912 for (int i = 0; i < laneCount; ++i) {
2913 dst.SetUint(vform, i, result[2 * i]);
2914 }
2915 return dst;
2916 }
2917
2918 LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst,
2919 const LogicVRegister& src1,
2920 const LogicVRegister& src2) {
2921 uint64_t result[32];
2922 int laneCount = LaneCountFromFormat(vform);
2923 for (int i = 0; i < laneCount; ++i) {
2924 result[i] = src1.Uint(vform, i);
2925 result[laneCount + i] = src2.Uint(vform, i);
2926 }
2927
2928 dst.ClearForWrite(vform);
2929 for (int i = 0; i < laneCount; ++i) {
2930 dst.SetUint(vform, i, result[(2 * i) + 1]);
2931 }
2932 return dst;
2933 }
2934
2935 template <typename T>
2936 T Simulator::FPAdd(T op1, T op2) {
2937 T result = FPProcessNaNs(op1, op2);
2938 if (std::isnan(result)) return result;
2939
2940 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
2941 // inf + -inf returns the default NaN.
2942 FPProcessException();
2943 return FPDefaultNaN<T>();
2944 } else {
2945 // Other cases should be handled by standard arithmetic.
2946 return op1 + op2;
2947 }
2948 }
2949
2950 template <typename T>
2951 T Simulator::FPSub(T op1, T op2) {
2952 // NaNs should be handled elsewhere.
2953 DCHECK(!std::isnan(op1) && !std::isnan(op2));
2954
2955 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
2956 // inf - inf returns the default NaN.
2957 FPProcessException();
2958 return FPDefaultNaN<T>();
2959 } else {
2960 // Other cases should be handled by standard arithmetic.
2961 return op1 - op2;
2962 }
2963 }
2964
2965 template <typename T>
2966 T Simulator::FPMul(T op1, T op2) {
2967 // NaNs should be handled elsewhere.
2968 DCHECK(!std::isnan(op1) && !std::isnan(op2));
2969
2970 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2971 // inf * 0.0 returns the default NaN.
2972 FPProcessException();
2973 return FPDefaultNaN<T>();
2974 } else {
2975 // Other cases should be handled by standard arithmetic.
2976 return op1 * op2;
2977 }
2978 }
2979
2980 template <typename T>
2981 T Simulator::FPMulx(T op1, T op2) {
2982 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2983 // inf * 0.0 returns +/-2.0.
2984 T two = 2.0;
2985 return copysign(1.0, op1) * copysign(1.0, op2) * two;
2986 }
2987 return FPMul(op1, op2);
2988 }
2989
2990 template <typename T>
2991 T Simulator::FPMulAdd(T a, T op1, T op2) {
2992 T result = FPProcessNaNs3(a, op1, op2);
2993
2994 T sign_a = copysign(1.0, a);
2995 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
2996 bool isinf_prod = std::isinf(op1) || std::isinf(op2);
2997 bool operation_generates_nan =
2998 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0
2999 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf
3000 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
3001
3002 if (std::isnan(result)) {
3003 // Generated NaNs override quiet NaNs propagated from a.
3004 if (operation_generates_nan && IsQuietNaN(a)) {
3005 FPProcessException();
3006 return FPDefaultNaN<T>();
3007 } else {
3008 return result;
3009 }
3010 }
3011
3012 // If the operation would produce a NaN, return the default NaN.
3013 if (operation_generates_nan) {
3014 FPProcessException();
3015 return FPDefaultNaN<T>();
3016 }
3017
3018 // Work around broken fma implementations for exact zero results: The sign of
3019 // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3020 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3021 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3022 }
3023
3024 result = FusedMultiplyAdd(op1, op2, a);
3025 DCHECK(!std::isnan(result));
3026
3027 // Work around broken fma implementations for rounded zero results: If a is
3028 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3029 if ((a == 0.0) && (result == 0.0)) {
3030 return copysign(0.0, sign_prod);
3031 }
3032
3033 return result;
3034 }
3035
3036 template <typename T>
3037 T Simulator::FPDiv(T op1, T op2) {
3038 // NaNs should be handled elsewhere.
3039 DCHECK(!std::isnan(op1) && !std::isnan(op2));
3040
3041 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3042 // inf / inf and 0.0 / 0.0 return the default NaN.
3043 FPProcessException();
3044 return FPDefaultNaN<T>();
3045 } else {
3046 if (op2 == 0.0) {
3047 FPProcessException();
3048 if (!std::isnan(op1)) {
3049 double op1_sign = copysign(1.0, op1);
3050 double op2_sign = copysign(1.0, op2);
3051 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
3052 }
3053 }
3054
3055 // Other cases should be handled by standard arithmetic.
3056 return op1 / op2;
3057 }
3058 }
3059
3060 template <typename T>
3061 T Simulator::FPSqrt(T op) {
3062 if (std::isnan(op)) {
3063 return FPProcessNaN(op);
3064 } else if (op < 0.0) {
3065 FPProcessException();
3066 return FPDefaultNaN<T>();
3067 } else {
3068 return sqrt(op);
3069 }
3070 }
3071
3072 template <typename T>
3073 T Simulator::FPMax(T a, T b) {
3074 T result = FPProcessNaNs(a, b);
3075 if (std::isnan(result)) return result;
3076
3077 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3078 // a and b are zero, and the sign differs: return +0.0.
3079 return 0.0;
3080 } else {
3081 return (a > b) ? a : b;
3082 }
3083 }
3084
3085 template <typename T>
3086 T Simulator::FPMaxNM(T a, T b) {
3087 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3088 a = kFP64NegativeInfinity;
3089 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3090 b = kFP64NegativeInfinity;
3091 }
3092
3093 T result = FPProcessNaNs(a, b);
3094 return std::isnan(result) ? result : FPMax(a, b);
3095 }
3096
3097 template <typename T>
3098 T Simulator::FPMin(T a, T b) {
3099 T result = FPProcessNaNs(a, b);
3100 if (std::isnan(result)) return result;
3101
3102 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3103 // a and b are zero, and the sign differs: return -0.0.
3104 return -0.0;
3105 } else {
3106 return (a < b) ? a : b;
3107 }
3108 }
3109
3110 template <typename T>
3111 T Simulator::FPMinNM(T a, T b) {
3112 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3113 a = kFP64PositiveInfinity;
3114 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3115 b = kFP64PositiveInfinity;
3116 }
3117
3118 T result = FPProcessNaNs(a, b);
3119 return std::isnan(result) ? result : FPMin(a, b);
3120 }
3121
3122 template <typename T>
3123 T Simulator::FPRecipStepFused(T op1, T op2) {
3124 const T two = 2.0;
3125 if ((std::isinf(op1) && (op2 == 0.0)) ||
3126 ((op1 == 0.0) && (std::isinf(op2)))) {
3127 return two;
3128 } else if (std::isinf(op1) || std::isinf(op2)) {
3129 // Return +inf if signs match, otherwise -inf.
3130 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3131 : kFP64NegativeInfinity;
3132 } else {
3133 return FusedMultiplyAdd(op1, op2, two);
3134 }
3135 }
3136
3137 template <typename T>
3138 T Simulator::FPRSqrtStepFused(T op1, T op2) {
3139 const T one_point_five = 1.5;
3140 const T two = 2.0;
3141
3142 if ((std::isinf(op1) && (op2 == 0.0)) ||
3143 ((op1 == 0.0) && (std::isinf(op2)))) {
3144 return one_point_five;
3145 } else if (std::isinf(op1) || std::isinf(op2)) {
3146 // Return +inf if signs match, otherwise -inf.
3147 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3148 : kFP64NegativeInfinity;
3149 } else {
3150 // The multiply-add-halve operation must be fully fused, so avoid interim
3151 // rounding by checking which operand can be losslessly divided by two
3152 // before doing the multiply-add.
3153 if (std::isnormal(op1 / two)) {
3154 return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3155 } else if (std::isnormal(op2 / two)) {
3156 return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3157 } else {
3158 // Neither operand is normal after halving: the result is dominated by
3159 // the addition term, so just return that.
3160 return one_point_five;
3161 }
3162 }
3163 }
3164
3165 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3166 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3167 (value == kFP64NegativeInfinity)) {
3168 return value;
3169 } else if (std::isnan(value)) {
3170 return FPProcessNaN(value);
3171 }
3172
3173 double int_result = std::floor(value);
3174 double error = value - int_result;
3175 switch (round_mode) {
3176 case FPTieAway: {
3177 // Take care of correctly handling the range ]-0.5, -0.0], which must
3178 // yield -0.0.
3179 if ((-0.5 < value) && (value < 0.0)) {
3180 int_result = -0.0;
3181
3182 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3183 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3184 // result is positive, round up.
3185 int_result++;
3186 }
3187 break;
3188 }
3189 case FPTieEven: {
3190 // Take care of correctly handling the range [-0.5, -0.0], which must
3191 // yield -0.0.
3192 if ((-0.5 <= value) && (value < 0.0)) {
3193 int_result = -0.0;
3194
3195 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3196 // result is odd, round up.
3197 } else if ((error > 0.5) ||
3198 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3199 int_result++;
3200 }
3201 break;
3202 }
3203 case FPZero: {
3204 // If value>0 then we take floor(value)
3205 // otherwise, ceil(value).
3206 if (value < 0) {
3207 int_result = ceil(value);
3208 }
3209 break;
3210 }
3211 case FPNegativeInfinity: {
3212 // We always use floor(value).
3213 break;
3214 }
3215 case FPPositiveInfinity: {
3216 // Take care of correctly handling the range ]-1.0, -0.0], which must
3217 // yield -0.0.
3218 if ((-1.0 < value) && (value < 0.0)) {
3219 int_result = -0.0;
3220
3221 // If the error is non-zero, round up.
3222 } else if (error > 0.0) {
3223 int_result++;
3224 }
3225 break;
3226 }
3227 default:
3228 UNIMPLEMENTED();
3229 }
3230 return int_result;
3231 }
3232
3233 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3234 value = FPRoundInt(value, rmode);
3235 if (value >= kWMaxInt) {
3236 return kWMaxInt;
3237 } else if (value < kWMinInt) {
3238 return kWMinInt;
3239 }
3240 return std::isnan(value) ? 0 : static_cast<int32_t>(value);
3241 }
3242
3243 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3244 value = FPRoundInt(value, rmode);
3245 if (value >= kXMaxInt) {
3246 return kXMaxInt;
3247 } else if (value < kXMinInt) {
3248 return kXMinInt;
3249 }
3250 return std::isnan(value) ? 0 : static_cast<int64_t>(value);
3251 }
3252
3253 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3254 value = FPRoundInt(value, rmode);
3255 if (value >= kWMaxUInt) {
3256 return kWMaxUInt;
3257 } else if (value < 0.0) {
3258 return 0;
3259 }
3260 return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
3261 }
3262
3263 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3264 value = FPRoundInt(value, rmode);
3265 if (value >= kXMaxUInt) {
3266 return kXMaxUInt;
3267 } else if (value < 0.0) {
3268 return 0;
3269 }
3270 return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
3271 }
3272
3273 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
3274 template <typename T> \
3275 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3276 const LogicVRegister& src1, \
3277 const LogicVRegister& src2) { \
3278 dst.ClearForWrite(vform); \
3279 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
3280 T op1 = src1.Float<T>(i); \
3281 T op2 = src2.Float<T>(i); \
3282 T result; \
3283 if (PROCNAN) { \
3284 result = FPProcessNaNs(op1, op2); \
3285 if (!std::isnan(result)) { \
3286 result = OP(op1, op2); \
3287 } \
3288 } else { \
3289 result = OP(op1, op2); \
3290 } \
3291 dst.SetFloat(i, result); \
3292 } \
3293 return dst; \
3294 } \
3295 \
3296 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3297 const LogicVRegister& src1, \
3298 const LogicVRegister& src2) { \
3299 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { \
3300 FN<float>(vform, dst, src1, src2); \
3301 } else { \
3302 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); \
3303 FN<double>(vform, dst, src1, src2); \
3304 } \
3305 return dst; \
3306 }
3307 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3308 #undef DEFINE_NEON_FP_VECTOR_OP
3309
3310 LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst,
3311 const LogicVRegister& src1,
3312 const LogicVRegister& src2) {
3313 SimVRegister temp;
3314 LogicVRegister product = fmul(vform, temp, src1, src2);
3315 return fneg(vform, dst, product);
3316 }
3317
3318 template <typename T>
3319 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3320 const LogicVRegister& src1,
3321 const LogicVRegister& src2) {
3322 dst.ClearForWrite(vform);
3323 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3324 T op1 = -src1.Float<T>(i);
3325 T op2 = src2.Float<T>(i);
3326 T result = FPProcessNaNs(op1, op2);
3327 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
3328 }
3329 return dst;
3330 }
3331
3332 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3333 const LogicVRegister& src1,
3334 const LogicVRegister& src2) {
3335 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3336 frecps<float>(vform, dst, src1, src2);
3337 } else {
3338 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3339 frecps<double>(vform, dst, src1, src2);
3340 }
3341 return dst;
3342 }
3343
3344 template <typename T>
3345 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3346 const LogicVRegister& src1,
3347 const LogicVRegister& src2) {
3348 dst.ClearForWrite(vform);
3349 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3350 T op1 = -src1.Float<T>(i);
3351 T op2 = src2.Float<T>(i);
3352 T result = FPProcessNaNs(op1, op2);
3353 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
3354 }
3355 return dst;
3356 }
3357
3358 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3359 const LogicVRegister& src1,
3360 const LogicVRegister& src2) {
3361 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3362 frsqrts<float>(vform, dst, src1, src2);
3363 } else {
3364 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3365 frsqrts<double>(vform, dst, src1, src2);
3366 }
3367 return dst;
3368 }
3369
3370 template <typename T>
3371 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3372 const LogicVRegister& src1,
3373 const LogicVRegister& src2, Condition cond) {
3374 dst.ClearForWrite(vform);
3375 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3376 bool result = false;
3377 T op1 = src1.Float<T>(i);
3378 T op2 = src2.Float<T>(i);
3379 T nan_result = FPProcessNaNs(op1, op2);
3380 if (!std::isnan(nan_result)) {
3381 switch (cond) {
3382 case eq:
3383 result = (op1 == op2);
3384 break;
3385 case ge:
3386 result = (op1 >= op2);
3387 break;
3388 case gt:
3389 result = (op1 > op2);
3390 break;
3391 case le:
3392 result = (op1 <= op2);
3393 break;
3394 case lt:
3395 result = (op1 < op2);
3396 break;
3397 default:
3398 UNREACHABLE();
3399 break;
3400 }
3401 }
3402 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
3403 }
3404 return dst;
3405 }
3406
3407 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3408 const LogicVRegister& src1,
3409 const LogicVRegister& src2, Condition cond) {
3410 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3411 fcmp<float>(vform, dst, src1, src2, cond);
3412 } else {
3413 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3414 fcmp<double>(vform, dst, src1, src2, cond);
3415 }
3416 return dst;
3417 }
3418
3419 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst,
3420 const LogicVRegister& src, Condition cond) {
3421 SimVRegister temp;
3422 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3423 LogicVRegister zero_reg =
3424 dup_immediate(vform, temp, bit_cast<uint32_t>(0.0f));
3425 fcmp<float>(vform, dst, src, zero_reg, cond);
3426 } else {
3427 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3428 LogicVRegister zero_reg =
3429 dup_immediate(vform, temp, bit_cast<uint64_t>(0.0));
3430 fcmp<double>(vform, dst, src, zero_reg, cond);
3431 }
3432 return dst;
3433 }
3434
3435 LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst,
3436 const LogicVRegister& src1,
3437 const LogicVRegister& src2, Condition cond) {
3438 SimVRegister temp1, temp2;
3439 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3440 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
3441 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
3442 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
3443 } else {
3444 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3445 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
3446 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
3447 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
3448 }
3449 return dst;
3450 }
3451
3452 template <typename T>
3453 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3454 const LogicVRegister& src1,
3455 const LogicVRegister& src2) {
3456 dst.ClearForWrite(vform);
3457 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3458 T op1 = src1.Float<T>(i);
3459 T op2 = src2.Float<T>(i);
3460 T acc = dst.Float<T>(i);
3461 T result = FPMulAdd(acc, op1, op2);
3462 dst.SetFloat(i, result);
3463 }
3464 return dst;
3465 }
3466
3467 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3468 const LogicVRegister& src1,
3469 const LogicVRegister& src2) {
3470 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3471 fmla<float>(vform, dst, src1, src2);
3472 } else {
3473 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3474 fmla<double>(vform, dst, src1, src2);
3475 }
3476 return dst;
3477 }
3478
3479 template <typename T>
3480 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3481 const LogicVRegister& src1,
3482 const LogicVRegister& src2) {
3483 dst.ClearForWrite(vform);
3484 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3485 T op1 = -src1.Float<T>(i);
3486 T op2 = src2.Float<T>(i);
3487 T acc = dst.Float<T>(i);
3488 T result = FPMulAdd(acc, op1, op2);
3489 dst.SetFloat(i, result);
3490 }
3491 return dst;
3492 }
3493
3494 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3495 const LogicVRegister& src1,
3496 const LogicVRegister& src2) {
3497 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3498 fmls<float>(vform, dst, src1, src2);
3499 } else {
3500 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3501 fmls<double>(vform, dst, src1, src2);
3502 }
3503 return dst;
3504 }
3505
3506 template <typename T>
3507 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3508 const LogicVRegister& src) {
3509 dst.ClearForWrite(vform);
3510 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3511 T op = src.Float<T>(i);
3512 op = -op;
3513 dst.SetFloat(i, op);
3514 }
3515 return dst;
3516 }
3517
3518 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3519 const LogicVRegister& src) {
3520 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3521 fneg<float>(vform, dst, src);
3522 } else {
3523 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3524 fneg<double>(vform, dst, src);
3525 }
3526 return dst;
3527 }
3528
3529 template <typename T>
3530 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3531 const LogicVRegister& src) {
3532 dst.ClearForWrite(vform);
3533 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3534 T op = src.Float<T>(i);
3535 if (copysign(1.0, op) < 0.0) {
3536 op = -op;
3537 }
3538 dst.SetFloat(i, op);
3539 }
3540 return dst;
3541 }
3542
3543 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3544 const LogicVRegister& src) {
3545 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3546 fabs_<float>(vform, dst, src);
3547 } else {
3548 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3549 fabs_<double>(vform, dst, src);
3550 }
3551 return dst;
3552 }
3553
3554 LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst,
3555 const LogicVRegister& src1,
3556 const LogicVRegister& src2) {
3557 SimVRegister temp;
3558 fsub(vform, temp, src1, src2);
3559 fabs_(vform, dst, temp);
3560 return dst;
3561 }
3562
3563 LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst,
3564 const LogicVRegister& src) {
3565 dst.ClearForWrite(vform);
3566 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3567 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3568 float result = FPSqrt(src.Float<float>(i));
3569 dst.SetFloat(i, result);
3570 }
3571 } else {
3572 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3573 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3574 double result = FPSqrt(src.Float<double>(i));
3575 dst.SetFloat(i, result);
3576 }
3577 }
3578 return dst;
3579 }
3580
3581 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
3582 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3583 const LogicVRegister& src1, \
3584 const LogicVRegister& src2) { \
3585 SimVRegister temp1, temp2; \
3586 uzp1(vform, temp1, src1, src2); \
3587 uzp2(vform, temp2, src1, src2); \
3588 FN(vform, dst, temp1, temp2); \
3589 return dst; \
3590 } \
3591 \
3592 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3593 const LogicVRegister& src) { \
3594 if (vform == kFormatS) { \
3595 float result = OP(src.Float<float>(0), src.Float<float>(1)); \
3596 dst.SetFloat(0, result); \
3597 } else { \
3598 DCHECK_EQ(vform, kFormatD); \
3599 double result = OP(src.Float<double>(0), src.Float<double>(1)); \
3600 dst.SetFloat(0, result); \
3601 } \
3602 dst.ClearForWrite(vform); \
3603 return dst; \
3604 }
3605 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
3606 #undef DEFINE_NEON_FP_PAIR_OP
3607
3608 LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst,
3609 const LogicVRegister& src, FPMinMaxOp Op) {
3610 DCHECK_EQ(vform, kFormat4S);
3611 USE(vform);
3612 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
3613 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
3614 float result = (this->*Op)(result1, result2);
3615 dst.ClearForWrite(kFormatS);
3616 dst.SetFloat<float>(0, result);
3617 return dst;
3618 }
3619
3620 LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst,
3621 const LogicVRegister& src) {
3622 return FMinMaxV(vform, dst, src, &Simulator::FPMax);
3623 }
3624
3625 LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst,
3626 const LogicVRegister& src) {
3627 return FMinMaxV(vform, dst, src, &Simulator::FPMin);
3628 }
3629
3630 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst,
3631 const LogicVRegister& src) {
3632 return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM);
3633 }
3634
3635 LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst,
3636 const LogicVRegister& src) {
3637 return FMinMaxV(vform, dst, src, &Simulator::FPMinNM);
3638 }
3639
3640 LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst,
3641 const LogicVRegister& src1,
3642 const LogicVRegister& src2, int index) {
3643 dst.ClearForWrite(vform);
3644 SimVRegister temp;
3645 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3646 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3647 fmul<float>(vform, dst, src1, index_reg);
3648 } else {
3649 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3650 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3651 fmul<double>(vform, dst, src1, index_reg);
3652 }
3653 return dst;
3654 }
3655
3656 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3657 const LogicVRegister& src1,
3658 const LogicVRegister& src2, int index) {
3659 dst.ClearForWrite(vform);
3660 SimVRegister temp;
3661 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3662 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3663 fmla<float>(vform, dst, src1, index_reg);
3664 } else {
3665 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3666 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3667 fmla<double>(vform, dst, src1, index_reg);
3668 }
3669 return dst;
3670 }
3671
3672 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3673 const LogicVRegister& src1,
3674 const LogicVRegister& src2, int index) {
3675 dst.ClearForWrite(vform);
3676 SimVRegister temp;
3677 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3678 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3679 fmls<float>(vform, dst, src1, index_reg);
3680 } else {
3681 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3682 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3683 fmls<double>(vform, dst, src1, index_reg);
3684 }
3685 return dst;
3686 }
3687
3688 LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst,
3689 const LogicVRegister& src1,
3690 const LogicVRegister& src2, int index) {
3691 dst.ClearForWrite(vform);
3692 SimVRegister temp;
3693 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3694 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3695 fmulx<float>(vform, dst, src1, index_reg);
3696
3697 } else {
3698 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3699 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3700 fmulx<double>(vform, dst, src1, index_reg);
3701 }
3702 return dst;
3703 }
3704
3705 LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst,
3706 const LogicVRegister& src,
3707 FPRounding rounding_mode,
3708 bool inexact_exception) {
3709 dst.ClearForWrite(vform);
3710 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3711 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3712 float input = src.Float<float>(i);
3713 float rounded = FPRoundInt(input, rounding_mode);
3714 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3715 FPProcessException();
3716 }
3717 dst.SetFloat<float>(i, rounded);
3718 }
3719 } else {
3720 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3721 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3722 double input = src.Float<double>(i);
3723 double rounded = FPRoundInt(input, rounding_mode);
3724 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3725 FPProcessException();
3726 }
3727 dst.SetFloat<double>(i, rounded);
3728 }
3729 }
3730 return dst;
3731 }
3732
3733 LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst,
3734 const LogicVRegister& src,
3735 FPRounding rounding_mode, int fbits) {
3736 dst.ClearForWrite(vform);
3737 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3738 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3739 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3740 dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
3741 }
3742 } else {
3743 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3744 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3745 double op = src.Float<double>(i) * std::pow(2.0, fbits);
3746 dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
3747 }
3748 }
3749 return dst;
3750 }
3751
3752 LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst,
3753 const LogicVRegister& src,
3754 FPRounding rounding_mode, int fbits) {
3755 dst.ClearForWrite(vform);
3756 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3757 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3758 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3759 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
3760 }
3761 } else {
3762 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3763 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3764 double op = src.Float<double>(i) * std::pow(2.0, fbits);
3765 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
3766 }
3767 }
3768 return dst;
3769 }
3770
3771 LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst,
3772 const LogicVRegister& src) {
3773 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3774 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3775 dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
3776 }
3777 } else {
3778 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3779 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3780 dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
3781 }
3782 }
3783 return dst;
3784 }
3785
3786 LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst,
3787 const LogicVRegister& src) {
3788 int lane_count = LaneCountFromFormat(vform);
3789 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3790 for (int i = 0; i < lane_count; i++) {
3791 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
3792 }
3793 } else {
3794 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3795 for (int i = 0; i < lane_count; i++) {
3796 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
3797 }
3798 }
3799 return dst;
3800 }
3801
3802 LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst,
3803 const LogicVRegister& src) {
3804 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3805 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3806 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
3807 }
3808 } else {
3809 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3810 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3811 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
3812 }
3813 }
3814 return dst;
3815 }
3816
3817 LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst,
3818 const LogicVRegister& src) {
3819 int lane_count = LaneCountFromFormat(vform) / 2;
3820 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3821 for (int i = lane_count - 1; i >= 0; i--) {
3822 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
3823 }
3824 } else {
3825 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3826 for (int i = lane_count - 1; i >= 0; i--) {
3827 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
3828 }
3829 }
3830 return dst;
3831 }
3832
3833 LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst,
3834 const LogicVRegister& src) {
3835 dst.ClearForWrite(vform);
3836 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3837 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3838 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
3839 }
3840 return dst;
3841 }
3842
3843 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst,
3844 const LogicVRegister& src) {
3845 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3846 int lane_count = LaneCountFromFormat(vform) / 2;
3847 for (int i = lane_count - 1; i >= 0; i--) {
3848 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
3849 }
3850 return dst;
3851 }
3852
3853 // Based on reference C function recip_sqrt_estimate from ARM ARM.
3854 double Simulator::recip_sqrt_estimate(double a) {
3855 int q0, q1, s;
3856 double r;
3857 if (a < 0.5) {
3858 q0 = static_cast<int>(a * 512.0);
3859 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
3860 } else {
3861 q1 = static_cast<int>(a * 256.0);
3862 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
3863 }
3864 s = static_cast<int>(256.0 * r + 0.5);
3865 return static_cast<double>(s) / 256.0;
3866 }
3867
3868 namespace {
3869
3870 inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
3871 return unsigned_bitextract_64(start_bit, end_bit, val);
3872 }
3873
3874 } // anonymous namespace
3875
3876 template <typename T>
3877 T Simulator::FPRecipSqrtEstimate(T op) {
3878 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
3879 "T must be a float or double");
3880
3881 if (std::isnan(op)) {
3882 return FPProcessNaN(op);
3883 } else if (op == 0.0) {
3884 if (copysign(1.0, op) < 0.0) {
3885 return kFP64NegativeInfinity;
3886 } else {
3887 return kFP64PositiveInfinity;
3888 }
3889 } else if (copysign(1.0, op) < 0.0) {
3890 FPProcessException();
3891 return FPDefaultNaN<T>();
3892 } else if (std::isinf(op)) {
3893 return 0.0;
3894 } else {
3895 uint64_t fraction;
3896 int32_t exp, result_exp;
3897
3898 if (sizeof(T) == sizeof(float)) {
3899 exp = static_cast<int32_t>(float_exp(op));
3900 fraction = float_mantissa(op);
3901 fraction <<= 29;
3902 } else {
3903 exp = static_cast<int32_t>(double_exp(op));
3904 fraction = double_mantissa(op);
3905 }
3906
3907 if (exp == 0) {
3908 while (Bits(fraction, 51, 51) == 0) {
3909 fraction = Bits(fraction, 50, 0) << 1;
3910 exp -= 1;
3911 }
3912 fraction = Bits(fraction, 50, 0) << 1;
3913 }
3914
3915 double scaled;
3916 if (Bits(exp, 0, 0) == 0) {
3917 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
3918 } else {
3919 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);
3920 }
3921
3922 if (sizeof(T) == sizeof(float)) {
3923 result_exp = (380 - exp) / 2;
3924 } else {
3925 result_exp = (3068 - exp) / 2;
3926 }
3927
3928 uint64_t estimate = bit_cast<uint64_t>(recip_sqrt_estimate(scaled));
3929
3930 if (sizeof(T) == sizeof(float)) {
3931 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
3932 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
3933 return float_pack(0, exp_bits, est_bits);
3934 } else {
3935 return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
3936 }
3937 }
3938 }
3939
3940 LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst,
3941 const LogicVRegister& src) {
3942 dst.ClearForWrite(vform);
3943 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3944 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3945 float input = src.Float<float>(i);
3946 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
3947 }
3948 } else {
3949 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3950 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3951 double input = src.Float<double>(i);
3952 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
3953 }
3954 }
3955 return dst;
3956 }
3957
3958 template <typename T>
3959 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
3960 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
3961 "T must be a float or double");
3962 uint32_t sign;
3963
3964 if (sizeof(T) == sizeof(float)) {
3965 sign = float_sign(op);
3966 } else {
3967 sign = double_sign(op);
3968 }
3969
3970 if (std::isnan(op)) {
3971 return FPProcessNaN(op);
3972 } else if (std::isinf(op)) {
3973 return (sign == 1) ? -0.0 : 0.0;
3974 } else if (op == 0.0) {
3975 FPProcessException(); // FPExc_DivideByZero exception.
3976 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
3977 } else if (((sizeof(T) == sizeof(float)) &&
3978 (std::fabs(op) < std::pow(2.0, -128.0))) ||
3979 ((sizeof(T) == sizeof(double)) &&
3980 (std::fabs(op) < std::pow(2.0, -1024.0)))) {
3981 bool overflow_to_inf = false;
3982 switch (rounding) {
3983 case FPTieEven:
3984 overflow_to_inf = true;
3985 break;
3986 case FPPositiveInfinity:
3987 overflow_to_inf = (sign == 0);
3988 break;
3989 case FPNegativeInfinity:
3990 overflow_to_inf = (sign == 1);
3991 break;
3992 case FPZero:
3993 overflow_to_inf = false;
3994 break;
3995 default:
3996 break;
3997 }
3998 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
3999 if (overflow_to_inf) {
4000 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
4001 } else {
4002 // Return FPMaxNormal(sign).
4003 if (sizeof(T) == sizeof(float)) {
4004 return float_pack(sign, 0xfe, 0x07fffff);
4005 } else {
4006 return double_pack(sign, 0x7fe, 0x0fffffffffffffl);
4007 }
4008 }
4009 } else {
4010 uint64_t fraction;
4011 int32_t exp, result_exp;
4012 uint32_t sign;
4013
4014 if (sizeof(T) == sizeof(float)) {
4015 sign = float_sign(op);
4016 exp = static_cast<int32_t>(float_exp(op));
4017 fraction = float_mantissa(op);
4018 fraction <<= 29;
4019 } else {
4020 sign = double_sign(op);
4021 exp = static_cast<int32_t>(double_exp(op));
4022 fraction = double_mantissa(op);
4023 }
4024
4025 if (exp == 0) {
4026 if (Bits(fraction, 51, 51) == 0) {
4027 exp -= 1;
4028 fraction = Bits(fraction, 49, 0) << 2;
4029 } else {
4030 fraction = Bits(fraction, 50, 0) << 1;
4031 }
4032 }
4033
4034 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4035
4036 if (sizeof(T) == sizeof(float)) {
4037 result_exp = 253 - exp;
4038 } else {
4039 result_exp = 2045 - exp;
4040 }
4041
4042 double estimate = recip_estimate(scaled);
4043
4044 fraction = double_mantissa(estimate);
4045 if (result_exp == 0) {
4046 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4047 } else if (result_exp == -1) {
4048 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4049 result_exp = 0;
4050 }
4051 if (sizeof(T) == sizeof(float)) {
4052 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4053 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
4054 return float_pack(sign, exp_bits, frac_bits);
4055 } else {
4056 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4057 }
4058 }
4059 }
4060
4061 LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst,
4062 const LogicVRegister& src, FPRounding round) {
4063 dst.ClearForWrite(vform);
4064 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4065 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4066 float input = src.Float<float>(i);
4067 dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4068 }
4069 } else {
4070 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4071 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4072 double input = src.Float<double>(i);
4073 dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4074 }
4075 }
4076 return dst;
4077 }
4078
4079 LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst,
4080 const LogicVRegister& src) {
4081 dst.ClearForWrite(vform);
4082 uint64_t operand;
4083 uint32_t result;
4084 double dp_operand, dp_result;
4085 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4086 operand = src.Uint(vform, i);
4087 if (operand <= 0x3FFFFFFF) {
4088 result = 0xFFFFFFFF;
4089 } else {
4090 dp_operand = operand * std::pow(2.0, -32);
4091 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4092 result = static_cast<uint32_t>(dp_result);
4093 }
4094 dst.SetUint(vform, i, result);
4095 }
4096 return dst;
4097 }
4098
4099 // Based on reference C function recip_estimate from ARM ARM.
4100 double Simulator::recip_estimate(double a) {
4101 int q, s;
4102 double r;
4103 q = static_cast<int>(a * 512.0);
4104 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4105 s = static_cast<int>(256.0 * r + 0.5);
4106 return static_cast<double>(s) / 256.0;
4107 }
4108
4109 LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst,
4110 const LogicVRegister& src) {
4111 dst.ClearForWrite(vform);
4112 uint64_t operand;
4113 uint32_t result;
4114 double dp_operand, dp_result;
4115 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4116 operand = src.Uint(vform, i);
4117 if (operand <= 0x7FFFFFFF) {
4118 result = 0xFFFFFFFF;
4119 } else {
4120 dp_operand = operand * std::pow(2.0, -32);
4121 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4122 result = static_cast<uint32_t>(dp_result);
4123 }
4124 dst.SetUint(vform, i, result);
4125 }
4126 return dst;
4127 }
4128
4129 template <typename T>
4130 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4131 const LogicVRegister& src) {
4132 dst.ClearForWrite(vform);
4133 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4134 T op = src.Float<T>(i);
4135 T result;
4136 if (std::isnan(op)) {
4137 result = FPProcessNaN(op);
4138 } else {
4139 int exp;
4140 uint32_t sign;
4141 if (sizeof(T) == sizeof(float)) {
4142 sign = float_sign(op);
4143 exp = static_cast<int>(float_exp(op));
4144 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
4145 result = float_pack(sign, exp, 0);
4146 } else {
4147 sign = double_sign(op);
4148 exp = static_cast<int>(double_exp(op));
4149 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
4150 result = double_pack(sign, exp, 0);
4151 }
4152 }
4153 dst.SetFloat(i, result);
4154 }
4155 return dst;
4156 }
4157
4158 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4159 const LogicVRegister& src) {
4160 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4161 frecpx<float>(vform, dst, src);
4162 } else {
4163 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4164 frecpx<double>(vform, dst, src);
4165 }
4166 return dst;
4167 }
4168
4169 LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst,
4170 const LogicVRegister& src, int fbits,
4171 FPRounding round) {
4172 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4173 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4174 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4175 dst.SetFloat<float>(i, result);
4176 } else {
4177 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4178 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4179 dst.SetFloat<double>(i, result);
4180 }
4181 }
4182 return dst;
4183 }
4184
4185 LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst,
4186 const LogicVRegister& src, int fbits,
4187 FPRounding round) {
4188 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4189 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4190 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4191 dst.SetFloat<float>(i, result);
4192 } else {
4193 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4194 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4195 dst.SetFloat<double>(i, result);
4196 }
4197 }
4198 return dst;
4199 }
4200
4201 #endif // USE_SIMULATOR
4202
4203 } // namespace internal
4204 } // namespace v8
4205
4206 #endif // V8_TARGET_ARCH_ARM64
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698