Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(137)

Side by Side Diff: src/arm64/simulator-logic-arm64.cc

Issue 2622643005: ARM64: Add NEON support (Closed)
Patch Set: Restore AreConsecutive change Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #if V8_TARGET_ARCH_ARM64
6
7 #include <cmath>
8 #include "src/arm64/simulator-arm64.h"
9
10 namespace v8 {
11 namespace internal {
12
13 #if defined(USE_SIMULATOR)
14
15 namespace {
16
17 // See FPRound for a description of this function.
18 inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa,
19 FPRounding round_mode) {
20 uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(
21 sign, exponent, mantissa, round_mode);
22 return bit_cast<double>(bits);
23 }
24
25 // See FPRound for a description of this function.
26 inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa,
27 FPRounding round_mode) {
28 uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(
29 sign, exponent, mantissa, round_mode);
30 return bit_cast<float>(bits);
31 }
32
33 // See FPRound for a description of this function.
34 inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent,
35 uint64_t mantissa, FPRounding round_mode) {
36 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
37 sign, exponent, mantissa, round_mode);
38 }
39
40 } // namespace
41
42 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
43 if (src >= 0) {
44 return UFixedToDouble(src, fbits, round);
45 } else if (src == INT64_MIN) {
46 return -UFixedToDouble(src, fbits, round);
47 } else {
48 return -UFixedToDouble(-src, fbits, round);
49 }
50 }
51
52 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
53 // An input of 0 is a special case because the result is effectively
54 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
55 if (src == 0) {
56 return 0.0;
57 }
58
59 // Calculate the exponent. The highest significant bit will have the value
60 // 2^exponent.
61 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
62 const int64_t exponent = highest_significant_bit - fbits;
63
64 return FPRoundToDouble(0, exponent, src, round);
65 }
66
67 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
68 if (src >= 0) {
69 return UFixedToFloat(src, fbits, round);
70 } else if (src == INT64_MIN) {
71 return -UFixedToFloat(src, fbits, round);
72 } else {
73 return -UFixedToFloat(-src, fbits, round);
74 }
75 }
76
77 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
78 // An input of 0 is a special case because the result is effectively
79 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
80 if (src == 0) {
81 return 0.0f;
82 }
83
84 // Calculate the exponent. The highest significant bit will have the value
85 // 2^exponent.
86 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
87 const int32_t exponent = highest_significant_bit - fbits;
88
89 return FPRoundToFloat(0, exponent, src, round);
90 }
91
92 double Simulator::FPToDouble(float value) {
93 switch (std::fpclassify(value)) {
94 case FP_NAN: {
95 if (IsSignallingNaN(value)) {
96 FPProcessException();
97 }
98 if (DN()) return kFP64DefaultNaN;
99
100 // Convert NaNs as the processor would:
101 // - The sign is propagated.
102 // - The mantissa is transferred entirely, except that the top bit is
103 // forced to '1', making the result a quiet NaN. The unused (low-order)
104 // mantissa bits are set to 0.
105 uint32_t raw = bit_cast<uint32_t>(value);
106
107 uint64_t sign = raw >> 31;
108 uint64_t exponent = (1 << kDoubleExponentBits) - 1;
109 uint64_t mantissa = unsigned_bitextract_64(21, 0, raw);
110
111 // Unused low-order bits remain zero.
112 mantissa <<= (kDoubleMantissaBits - kFloatMantissaBits);
113
114 // Force a quiet NaN.
115 mantissa |= (UINT64_C(1) << (kDoubleMantissaBits - 1));
116
117 return double_pack(sign, exponent, mantissa);
118 }
119
120 case FP_ZERO:
121 case FP_NORMAL:
122 case FP_SUBNORMAL:
123 case FP_INFINITE: {
124 // All other inputs are preserved in a standard cast, because every value
125 // representable using an IEEE-754 float is also representable using an
126 // IEEE-754 double.
127 return static_cast<double>(value);
128 }
129 }
130
131 UNREACHABLE();
132 return kFP64DefaultNaN;
133 }
134
135 float Simulator::FPToFloat(float16 value) {
136 uint32_t sign = value >> 15;
137 uint32_t exponent =
138 unsigned_bitextract_32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
139 kFloat16MantissaBits, value);
140 uint32_t mantissa =
141 unsigned_bitextract_32(kFloat16MantissaBits - 1, 0, value);
142
143 switch (float16classify(value)) {
144 case FP_ZERO:
145 return (sign == 0) ? 0.0f : -0.0f;
146
147 case FP_INFINITE:
148 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
149
150 case FP_SUBNORMAL: {
151 // Calculate shift required to put mantissa into the most-significant bits
152 // of the destination mantissa.
153 int shift = CountLeadingZeros(mantissa << (32 - 10), 32);
154
155 // Shift mantissa and discard implicit '1'.
156 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
157 mantissa &= (1 << kFloatMantissaBits) - 1;
158
159 // Adjust the exponent for the shift applied, and rebias.
160 exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias);
161 break;
162 }
163
164 case FP_NAN: {
165 if (IsSignallingNaN(value)) {
166 FPProcessException();
167 }
168 if (DN()) return kFP32DefaultNaN;
169
170 // Convert NaNs as the processor would:
171 // - The sign is propagated.
172 // - The mantissa is transferred entirely, except that the top bit is
173 // forced to '1', making the result a quiet NaN. The unused (low-order)
174 // mantissa bits are set to 0.
175 exponent = (1 << kFloatExponentBits) - 1;
176
177 // Increase bits in mantissa, making low-order bits 0.
178 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
179 mantissa |= 1 << (kFloatMantissaBits - 1); // Force a quiet NaN.
180 break;
181 }
182
183 case FP_NORMAL: {
184 // Increase bits in mantissa, making low-order bits 0.
185 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
186
187 // Change exponent bias.
188 exponent += (kFloatExponentBias - kFloat16ExponentBias);
189 break;
190 }
191
192 default:
193 UNREACHABLE();
194 return kFP32DefaultNaN;
195 }
196 return float_pack(sign, exponent, mantissa);
197 }
198
199 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
200 // Only the FPTieEven rounding mode is implemented.
201 DCHECK_EQ(round_mode, FPTieEven);
202 USE(round_mode);
203
204 int64_t sign = float_sign(value);
205 int64_t exponent =
206 static_cast<int64_t>(float_exp(value)) - kFloatExponentBias;
207 uint32_t mantissa = float_mantissa(value);
208
209 switch (std::fpclassify(value)) {
210 case FP_NAN: {
211 if (IsSignallingNaN(value)) {
212 FPProcessException();
213 }
214 if (DN()) return kFP16DefaultNaN;
215
216 // Convert NaNs as the processor would:
217 // - The sign is propagated.
218 // - The mantissa is transferred as much as possible, except that the top
219 // bit is forced to '1', making the result a quiet NaN.
220 float16 result =
221 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
222 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
223 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;
224 return result;
225 }
226
227 case FP_ZERO:
228 return (sign == 0) ? 0 : 0x8000;
229
230 case FP_INFINITE:
231 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
232
233 case FP_NORMAL:
234 case FP_SUBNORMAL: {
235 // Convert float-to-half as the processor would, assuming that FPCR.FZ
236 // (flush-to-zero) is not set.
237
238 // Add the implicit '1' bit to the mantissa.
239 mantissa += (1 << kFloatMantissaBits);
240 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
241 }
242 }
243
244 UNREACHABLE();
245 return kFP16DefaultNaN;
246 }
247
248 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
249 // Only the FPTieEven rounding mode is implemented.
250 DCHECK_EQ(round_mode, FPTieEven);
251 USE(round_mode);
252
253 int64_t sign = double_sign(value);
254 int64_t exponent =
255 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
256 uint64_t mantissa = double_mantissa(value);
257
258 switch (std::fpclassify(value)) {
259 case FP_NAN: {
260 if (IsSignallingNaN(value)) {
261 FPProcessException();
262 }
263 if (DN()) return kFP16DefaultNaN;
264
265 // Convert NaNs as the processor would:
266 // - The sign is propagated.
267 // - The mantissa is transferred as much as possible, except that the top
268 // bit is forced to '1', making the result a quiet NaN.
269 float16 result =
270 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
271 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
272 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;
273 return result;
274 }
275
276 case FP_ZERO:
277 return (sign == 0) ? 0 : 0x8000;
278
279 case FP_INFINITE:
280 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
281
282 case FP_NORMAL:
283 case FP_SUBNORMAL: {
284 // Convert double-to-half as the processor would, assuming that FPCR.FZ
285 // (flush-to-zero) is not set.
286
287 // Add the implicit '1' bit to the mantissa.
288 mantissa += (UINT64_C(1) << kDoubleMantissaBits);
289 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
290 }
291 }
292
293 UNREACHABLE();
294 return kFP16DefaultNaN;
295 }
296
297 float Simulator::FPToFloat(double value, FPRounding round_mode) {
298 // Only the FPTieEven rounding mode is implemented.
299 DCHECK((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
300 USE(round_mode);
301
302 switch (std::fpclassify(value)) {
303 case FP_NAN: {
304 if (IsSignallingNaN(value)) {
305 FPProcessException();
306 }
307 if (DN()) return kFP32DefaultNaN;
308
309 // Convert NaNs as the processor would:
310 // - The sign is propagated.
311 // - The mantissa is transferred as much as possible, except that the
312 // top bit is forced to '1', making the result a quiet NaN.
313
314 uint64_t raw = bit_cast<uint64_t>(value);
315
316 uint32_t sign = raw >> 63;
317 uint32_t exponent = (1 << 8) - 1;
318 uint32_t mantissa = static_cast<uint32_t>(unsigned_bitextract_64(
319 50, kDoubleMantissaBits - kFloatMantissaBits, raw));
320 mantissa |= (1 << (kFloatMantissaBits - 1)); // Force a quiet NaN.
321
322 return float_pack(sign, exponent, mantissa);
323 }
324
325 case FP_ZERO:
326 case FP_INFINITE: {
327 // In a C++ cast, any value representable in the target type will be
328 // unchanged. This is always the case for +/-0.0 and infinities.
329 return static_cast<float>(value);
330 }
331
332 case FP_NORMAL:
333 case FP_SUBNORMAL: {
334 // Convert double-to-float as the processor would, assuming that FPCR.FZ
335 // (flush-to-zero) is not set.
336 uint32_t sign = double_sign(value);
337 int64_t exponent =
338 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
339 uint64_t mantissa = double_mantissa(value);
340 if (std::fpclassify(value) == FP_NORMAL) {
341 // For normal FP values, add the hidden bit.
342 mantissa |= (UINT64_C(1) << kDoubleMantissaBits);
343 }
344 return FPRoundToFloat(sign, exponent, mantissa, round_mode);
345 }
346 }
347
348 UNREACHABLE();
349 return kFP32DefaultNaN;
350 }
351
352 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
353 dst.ClearForWrite(vform);
354 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
355 dst.ReadUintFromMem(vform, i, addr);
356 addr += LaneSizeInBytesFromFormat(vform);
357 }
358 }
359
360 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index,
361 uint64_t addr) {
362 dst.ReadUintFromMem(vform, index, addr);
363 }
364
365 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
366 dst.ClearForWrite(vform);
367 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
368 dst.ReadUintFromMem(vform, i, addr);
369 }
370 }
371
372 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
373 LogicVRegister dst2, uint64_t addr1) {
374 dst1.ClearForWrite(vform);
375 dst2.ClearForWrite(vform);
376 int esize = LaneSizeInBytesFromFormat(vform);
377 uint64_t addr2 = addr1 + esize;
378 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
379 dst1.ReadUintFromMem(vform, i, addr1);
380 dst2.ReadUintFromMem(vform, i, addr2);
381 addr1 += 2 * esize;
382 addr2 += 2 * esize;
383 }
384 }
385
386 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
387 LogicVRegister dst2, int index, uint64_t addr1) {
388 dst1.ClearForWrite(vform);
389 dst2.ClearForWrite(vform);
390 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
391 dst1.ReadUintFromMem(vform, index, addr1);
392 dst2.ReadUintFromMem(vform, index, addr2);
393 }
394
395 void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1,
396 LogicVRegister dst2, uint64_t addr) {
397 dst1.ClearForWrite(vform);
398 dst2.ClearForWrite(vform);
399 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
400 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
401 dst1.ReadUintFromMem(vform, i, addr);
402 dst2.ReadUintFromMem(vform, i, addr2);
403 }
404 }
405
406 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
407 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) {
408 dst1.ClearForWrite(vform);
409 dst2.ClearForWrite(vform);
410 dst3.ClearForWrite(vform);
411 int esize = LaneSizeInBytesFromFormat(vform);
412 uint64_t addr2 = addr1 + esize;
413 uint64_t addr3 = addr2 + esize;
414 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
415 dst1.ReadUintFromMem(vform, i, addr1);
416 dst2.ReadUintFromMem(vform, i, addr2);
417 dst3.ReadUintFromMem(vform, i, addr3);
418 addr1 += 3 * esize;
419 addr2 += 3 * esize;
420 addr3 += 3 * esize;
421 }
422 }
423
424 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
425 LogicVRegister dst2, LogicVRegister dst3, int index,
426 uint64_t addr1) {
427 dst1.ClearForWrite(vform);
428 dst2.ClearForWrite(vform);
429 dst3.ClearForWrite(vform);
430 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
431 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
432 dst1.ReadUintFromMem(vform, index, addr1);
433 dst2.ReadUintFromMem(vform, index, addr2);
434 dst3.ReadUintFromMem(vform, index, addr3);
435 }
436
437 void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1,
438 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) {
439 dst1.ClearForWrite(vform);
440 dst2.ClearForWrite(vform);
441 dst3.ClearForWrite(vform);
442 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
443 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
444 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
445 dst1.ReadUintFromMem(vform, i, addr);
446 dst2.ReadUintFromMem(vform, i, addr2);
447 dst3.ReadUintFromMem(vform, i, addr3);
448 }
449 }
450
451 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
452 LogicVRegister dst2, LogicVRegister dst3,
453 LogicVRegister dst4, uint64_t addr1) {
454 dst1.ClearForWrite(vform);
455 dst2.ClearForWrite(vform);
456 dst3.ClearForWrite(vform);
457 dst4.ClearForWrite(vform);
458 int esize = LaneSizeInBytesFromFormat(vform);
459 uint64_t addr2 = addr1 + esize;
460 uint64_t addr3 = addr2 + esize;
461 uint64_t addr4 = addr3 + esize;
462 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
463 dst1.ReadUintFromMem(vform, i, addr1);
464 dst2.ReadUintFromMem(vform, i, addr2);
465 dst3.ReadUintFromMem(vform, i, addr3);
466 dst4.ReadUintFromMem(vform, i, addr4);
467 addr1 += 4 * esize;
468 addr2 += 4 * esize;
469 addr3 += 4 * esize;
470 addr4 += 4 * esize;
471 }
472 }
473
474 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
475 LogicVRegister dst2, LogicVRegister dst3,
476 LogicVRegister dst4, int index, uint64_t addr1) {
477 dst1.ClearForWrite(vform);
478 dst2.ClearForWrite(vform);
479 dst3.ClearForWrite(vform);
480 dst4.ClearForWrite(vform);
481 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
482 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
483 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
484 dst1.ReadUintFromMem(vform, index, addr1);
485 dst2.ReadUintFromMem(vform, index, addr2);
486 dst3.ReadUintFromMem(vform, index, addr3);
487 dst4.ReadUintFromMem(vform, index, addr4);
488 }
489
490 void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1,
491 LogicVRegister dst2, LogicVRegister dst3,
492 LogicVRegister dst4, uint64_t addr) {
493 dst1.ClearForWrite(vform);
494 dst2.ClearForWrite(vform);
495 dst3.ClearForWrite(vform);
496 dst4.ClearForWrite(vform);
497 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
498 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
499 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
500 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
501 dst1.ReadUintFromMem(vform, i, addr);
502 dst2.ReadUintFromMem(vform, i, addr2);
503 dst3.ReadUintFromMem(vform, i, addr3);
504 dst4.ReadUintFromMem(vform, i, addr4);
505 }
506 }
507
508 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
509 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
510 src.WriteUintToMem(vform, i, addr);
511 addr += LaneSizeInBytesFromFormat(vform);
512 }
513 }
514
515 void Simulator::st1(VectorFormat vform, LogicVRegister src, int index,
516 uint64_t addr) {
517 src.WriteUintToMem(vform, index, addr);
518 }
519
520 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
521 uint64_t addr) {
522 int esize = LaneSizeInBytesFromFormat(vform);
523 uint64_t addr2 = addr + esize;
524 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
525 dst.WriteUintToMem(vform, i, addr);
526 dst2.WriteUintToMem(vform, i, addr2);
527 addr += 2 * esize;
528 addr2 += 2 * esize;
529 }
530 }
531
532 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
533 int index, uint64_t addr) {
534 int esize = LaneSizeInBytesFromFormat(vform);
535 dst.WriteUintToMem(vform, index, addr);
536 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
537 }
538
539 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
540 LogicVRegister dst3, uint64_t addr) {
541 int esize = LaneSizeInBytesFromFormat(vform);
542 uint64_t addr2 = addr + esize;
543 uint64_t addr3 = addr2 + esize;
544 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
545 dst.WriteUintToMem(vform, i, addr);
546 dst2.WriteUintToMem(vform, i, addr2);
547 dst3.WriteUintToMem(vform, i, addr3);
548 addr += 3 * esize;
549 addr2 += 3 * esize;
550 addr3 += 3 * esize;
551 }
552 }
553
554 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
555 LogicVRegister dst3, int index, uint64_t addr) {
556 int esize = LaneSizeInBytesFromFormat(vform);
557 dst.WriteUintToMem(vform, index, addr);
558 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
559 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
560 }
561
562 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
563 LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) {
564 int esize = LaneSizeInBytesFromFormat(vform);
565 uint64_t addr2 = addr + esize;
566 uint64_t addr3 = addr2 + esize;
567 uint64_t addr4 = addr3 + esize;
568 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
569 dst.WriteUintToMem(vform, i, addr);
570 dst2.WriteUintToMem(vform, i, addr2);
571 dst3.WriteUintToMem(vform, i, addr3);
572 dst4.WriteUintToMem(vform, i, addr4);
573 addr += 4 * esize;
574 addr2 += 4 * esize;
575 addr3 += 4 * esize;
576 addr4 += 4 * esize;
577 }
578 }
579
580 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
581 LogicVRegister dst3, LogicVRegister dst4, int index,
582 uint64_t addr) {
583 int esize = LaneSizeInBytesFromFormat(vform);
584 dst.WriteUintToMem(vform, index, addr);
585 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
586 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
587 dst4.WriteUintToMem(vform, index, addr + 3 * esize);
588 }
589
590 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
591 const LogicVRegister& src1,
592 const LogicVRegister& src2, Condition cond) {
593 dst.ClearForWrite(vform);
594 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
595 int64_t sa = src1.Int(vform, i);
596 int64_t sb = src2.Int(vform, i);
597 uint64_t ua = src1.Uint(vform, i);
598 uint64_t ub = src2.Uint(vform, i);
599 bool result = false;
600 switch (cond) {
601 case eq:
602 result = (ua == ub);
603 break;
604 case ge:
605 result = (sa >= sb);
606 break;
607 case gt:
608 result = (sa > sb);
609 break;
610 case hi:
611 result = (ua > ub);
612 break;
613 case hs:
614 result = (ua >= ub);
615 break;
616 case lt:
617 result = (sa < sb);
618 break;
619 case le:
620 result = (sa <= sb);
621 break;
622 default:
623 UNREACHABLE();
624 break;
625 }
626 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
627 }
628 return dst;
629 }
630
631 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
632 const LogicVRegister& src1, int imm,
633 Condition cond) {
634 SimVRegister temp;
635 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
636 return cmp(vform, dst, src1, imm_reg, cond);
637 }
638
639 LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst,
640 const LogicVRegister& src1,
641 const LogicVRegister& src2) {
642 dst.ClearForWrite(vform);
643 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
644 uint64_t ua = src1.Uint(vform, i);
645 uint64_t ub = src2.Uint(vform, i);
646 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
647 }
648 return dst;
649 }
650
651 LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst,
652 const LogicVRegister& src1,
653 const LogicVRegister& src2) {
654 int lane_size = LaneSizeInBitsFromFormat(vform);
655 dst.ClearForWrite(vform);
656 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
657 // Test for unsigned saturation.
658 uint64_t ua = src1.UintLeftJustified(vform, i);
659 uint64_t ub = src2.UintLeftJustified(vform, i);
660 uint64_t ur = ua + ub;
661 if (ur < ua) {
662 dst.SetUnsignedSat(i, true);
663 }
664
665 // Test for signed saturation.
666 bool pos_a = (ua >> 63) == 0;
667 bool pos_b = (ub >> 63) == 0;
668 bool pos_r = (ur >> 63) == 0;
669 // If the signs of the operands are the same, but different from the result,
670 // there was an overflow.
671 if ((pos_a == pos_b) && (pos_a != pos_r)) {
672 dst.SetSignedSat(i, pos_a);
673 }
674
675 dst.SetInt(vform, i, ur >> (64 - lane_size));
676 }
677 return dst;
678 }
679
680 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
681 const LogicVRegister& src1,
682 const LogicVRegister& src2) {
683 SimVRegister temp1, temp2;
684 uzp1(vform, temp1, src1, src2);
685 uzp2(vform, temp2, src1, src2);
686 add(vform, dst, temp1, temp2);
687 return dst;
688 }
689
690 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
691 const LogicVRegister& src1,
692 const LogicVRegister& src2) {
693 SimVRegister temp;
694 mul(vform, temp, src1, src2);
695 add(vform, dst, dst, temp);
696 return dst;
697 }
698
699 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
700 const LogicVRegister& src1,
701 const LogicVRegister& src2) {
702 SimVRegister temp;
703 mul(vform, temp, src1, src2);
704 sub(vform, dst, dst, temp);
705 return dst;
706 }
707
708 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
709 const LogicVRegister& src1,
710 const LogicVRegister& src2) {
711 dst.ClearForWrite(vform);
712 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
713 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
714 }
715 return dst;
716 }
717
718 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
719 const LogicVRegister& src1,
720 const LogicVRegister& src2, int index) {
721 SimVRegister temp;
722 VectorFormat indexform = VectorFormatFillQ(vform);
723 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
724 }
725
726 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
727 const LogicVRegister& src1,
728 const LogicVRegister& src2, int index) {
729 SimVRegister temp;
730 VectorFormat indexform = VectorFormatFillQ(vform);
731 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
732 }
733
734 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
735 const LogicVRegister& src1,
736 const LogicVRegister& src2, int index) {
737 SimVRegister temp;
738 VectorFormat indexform = VectorFormatFillQ(vform);
739 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
740 }
741
742 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
743 const LogicVRegister& src1,
744 const LogicVRegister& src2, int index) {
745 SimVRegister temp;
746 VectorFormat indexform =
747 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
748 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
749 }
750
751 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
752 const LogicVRegister& src1,
753 const LogicVRegister& src2, int index) {
754 SimVRegister temp;
755 VectorFormat indexform =
756 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
757 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
758 }
759
760 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
761 const LogicVRegister& src1,
762 const LogicVRegister& src2, int index) {
763 SimVRegister temp;
764 VectorFormat indexform =
765 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
766 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
767 }
768
769 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
770 const LogicVRegister& src1,
771 const LogicVRegister& src2, int index) {
772 SimVRegister temp;
773 VectorFormat indexform =
774 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
775 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
776 }
777
778 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
779 const LogicVRegister& src1,
780 const LogicVRegister& src2, int index) {
781 SimVRegister temp;
782 VectorFormat indexform =
783 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
784 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
785 }
786
787 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
788 const LogicVRegister& src1,
789 const LogicVRegister& src2, int index) {
790 SimVRegister temp;
791 VectorFormat indexform =
792 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
793 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
794 }
795
796 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
797 const LogicVRegister& src1,
798 const LogicVRegister& src2, int index) {
799 SimVRegister temp;
800 VectorFormat indexform =
801 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
802 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
803 }
804
805 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
806 const LogicVRegister& src1,
807 const LogicVRegister& src2, int index) {
808 SimVRegister temp;
809 VectorFormat indexform =
810 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
811 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
812 }
813
814 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
815 const LogicVRegister& src1,
816 const LogicVRegister& src2, int index) {
817 SimVRegister temp;
818 VectorFormat indexform =
819 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
820 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
821 }
822
823 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
824 const LogicVRegister& src1,
825 const LogicVRegister& src2, int index) {
826 SimVRegister temp;
827 VectorFormat indexform =
828 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
829 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
830 }
831
832 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
833 const LogicVRegister& src1,
834 const LogicVRegister& src2, int index) {
835 SimVRegister temp;
836 VectorFormat indexform =
837 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
838 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
839 }
840
841 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
842 const LogicVRegister& src1,
843 const LogicVRegister& src2, int index) {
844 SimVRegister temp;
845 VectorFormat indexform =
846 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
847 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
848 }
849
850 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
851 const LogicVRegister& src1,
852 const LogicVRegister& src2, int index) {
853 SimVRegister temp;
854 VectorFormat indexform =
855 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
856 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
857 }
858
859 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
860 const LogicVRegister& src1,
861 const LogicVRegister& src2, int index) {
862 SimVRegister temp;
863 VectorFormat indexform =
864 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
865 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
866 }
867
868 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
869 const LogicVRegister& src1,
870 const LogicVRegister& src2, int index) {
871 SimVRegister temp;
872 VectorFormat indexform =
873 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
874 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
875 }
876
877 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
878 const LogicVRegister& src1,
879 const LogicVRegister& src2, int index) {
880 SimVRegister temp;
881 VectorFormat indexform =
882 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
883 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
884 }
885
886 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
887 const LogicVRegister& src1,
888 const LogicVRegister& src2, int index) {
889 SimVRegister temp;
890 VectorFormat indexform =
891 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
892 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
893 }
894
895 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
896 const LogicVRegister& src1,
897 const LogicVRegister& src2, int index) {
898 SimVRegister temp;
899 VectorFormat indexform =
900 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
901 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
902 }
903
904 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
905 const LogicVRegister& src1,
906 const LogicVRegister& src2, int index) {
907 SimVRegister temp;
908 VectorFormat indexform = VectorFormatFillQ(vform);
909 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
910 }
911
912 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
913 const LogicVRegister& src1,
914 const LogicVRegister& src2, int index) {
915 SimVRegister temp;
916 VectorFormat indexform = VectorFormatFillQ(vform);
917 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
918 }
919
920 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
921 uint16_t result = 0;
922 uint16_t extended_op2 = op2;
923 for (int i = 0; i < 8; ++i) {
924 if ((op1 >> i) & 1) {
925 result = result ^ (extended_op2 << i);
926 }
927 }
928 return result;
929 }
930
931 LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst,
932 const LogicVRegister& src1,
933 const LogicVRegister& src2) {
934 dst.ClearForWrite(vform);
935 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
936 dst.SetUint(vform, i,
937 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
938 }
939 return dst;
940 }
941
942 LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst,
943 const LogicVRegister& src1,
944 const LogicVRegister& src2) {
945 VectorFormat vform_src = VectorFormatHalfWidth(vform);
946 dst.ClearForWrite(vform);
947 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
948 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, i),
949 src2.Uint(vform_src, i)));
950 }
951 return dst;
952 }
953
954 LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst,
955 const LogicVRegister& src1,
956 const LogicVRegister& src2) {
957 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
958 dst.ClearForWrite(vform);
959 int lane_count = LaneCountFromFormat(vform);
960 for (int i = 0; i < lane_count; i++) {
961 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, lane_count + i),
962 src2.Uint(vform_src, lane_count + i)));
963 }
964 return dst;
965 }
966
967 LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst,
968 const LogicVRegister& src1,
969 const LogicVRegister& src2) {
970 int lane_size = LaneSizeInBitsFromFormat(vform);
971 dst.ClearForWrite(vform);
972 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
973 // Test for unsigned saturation.
974 uint64_t ua = src1.UintLeftJustified(vform, i);
975 uint64_t ub = src2.UintLeftJustified(vform, i);
976 uint64_t ur = ua - ub;
977 if (ub > ua) {
978 dst.SetUnsignedSat(i, false);
979 }
980
981 // Test for signed saturation.
982 bool pos_a = (ua >> 63) == 0;
983 bool pos_b = (ub >> 63) == 0;
984 bool pos_r = (ur >> 63) == 0;
985 // If the signs of the operands are different, and the sign of the first
986 // operand doesn't match the result, there was an overflow.
987 if ((pos_a != pos_b) && (pos_a != pos_r)) {
988 dst.SetSignedSat(i, pos_a);
989 }
990
991 dst.SetInt(vform, i, ur >> (64 - lane_size));
992 }
993 return dst;
994 }
995
996 LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst,
997 const LogicVRegister& src1,
998 const LogicVRegister& src2) {
999 dst.ClearForWrite(vform);
1000 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1001 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1002 }
1003 return dst;
1004 }
1005
1006 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
1007 const LogicVRegister& src1,
1008 const LogicVRegister& src2) {
1009 dst.ClearForWrite(vform);
1010 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1011 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1012 }
1013 return dst;
1014 }
1015
1016 LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst,
1017 const LogicVRegister& src1,
1018 const LogicVRegister& src2) {
1019 dst.ClearForWrite(vform);
1020 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1021 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1022 }
1023 return dst;
1024 }
1025
1026 LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst,
1027 const LogicVRegister& src1,
1028 const LogicVRegister& src2) {
1029 dst.ClearForWrite(vform);
1030 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1031 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1032 }
1033 return dst;
1034 }
1035
1036 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1037 const LogicVRegister& src1,
1038 const LogicVRegister& src2) {
1039 dst.ClearForWrite(vform);
1040 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1041 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1042 }
1043 return dst;
1044 }
1045
1046 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1047 const LogicVRegister& src, uint64_t imm) {
1048 uint64_t result[16];
1049 int laneCount = LaneCountFromFormat(vform);
1050 for (int i = 0; i < laneCount; ++i) {
1051 result[i] = src.Uint(vform, i) & ~imm;
1052 }
1053 dst.SetUintArray(vform, result);
1054 return dst;
1055 }
1056
1057 LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst,
1058 const LogicVRegister& src1,
1059 const LogicVRegister& src2) {
1060 dst.ClearForWrite(vform);
1061 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1062 uint64_t operand1 = dst.Uint(vform, i);
1063 uint64_t operand2 = ~src2.Uint(vform, i);
1064 uint64_t operand3 = src1.Uint(vform, i);
1065 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1066 dst.SetUint(vform, i, result);
1067 }
1068 return dst;
1069 }
1070
1071 LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst,
1072 const LogicVRegister& src1,
1073 const LogicVRegister& src2) {
1074 dst.ClearForWrite(vform);
1075 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1076 uint64_t operand1 = dst.Uint(vform, i);
1077 uint64_t operand2 = src2.Uint(vform, i);
1078 uint64_t operand3 = src1.Uint(vform, i);
1079 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1080 dst.SetUint(vform, i, result);
1081 }
1082 return dst;
1083 }
1084
1085 LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst,
1086 const LogicVRegister& src1,
1087 const LogicVRegister& src2) {
1088 dst.ClearForWrite(vform);
1089 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1090 uint64_t operand1 = src2.Uint(vform, i);
1091 uint64_t operand2 = dst.Uint(vform, i);
1092 uint64_t operand3 = src1.Uint(vform, i);
1093 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1094 dst.SetUint(vform, i, result);
1095 }
1096 return dst;
1097 }
1098
1099 LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst,
1100 const LogicVRegister& src1,
1101 const LogicVRegister& src2, bool max) {
1102 dst.ClearForWrite(vform);
1103 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1104 int64_t src1_val = src1.Int(vform, i);
1105 int64_t src2_val = src2.Int(vform, i);
1106 int64_t dst_val;
1107 if (max) {
1108 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1109 } else {
1110 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1111 }
1112 dst.SetInt(vform, i, dst_val);
1113 }
1114 return dst;
1115 }
1116
1117 LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst,
1118 const LogicVRegister& src1,
1119 const LogicVRegister& src2) {
1120 return SMinMax(vform, dst, src1, src2, true);
1121 }
1122
1123 LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst,
1124 const LogicVRegister& src1,
1125 const LogicVRegister& src2) {
1126 return SMinMax(vform, dst, src1, src2, false);
1127 }
1128
1129 LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst,
1130 const LogicVRegister& src1,
1131 const LogicVRegister& src2, bool max) {
1132 int lanes = LaneCountFromFormat(vform);
1133 int64_t result[kMaxLanesPerVector];
1134 const LogicVRegister* src = &src1;
1135 for (int j = 0; j < 2; j++) {
1136 for (int i = 0; i < lanes; i += 2) {
1137 int64_t first_val = src->Int(vform, i);
1138 int64_t second_val = src->Int(vform, i + 1);
1139 int64_t dst_val;
1140 if (max) {
1141 dst_val = (first_val > second_val) ? first_val : second_val;
1142 } else {
1143 dst_val = (first_val < second_val) ? first_val : second_val;
1144 }
1145 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1146 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1147 }
1148 src = &src2;
1149 }
1150 dst.SetIntArray(vform, result);
1151 return dst;
1152 }
1153
1154 LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst,
1155 const LogicVRegister& src1,
1156 const LogicVRegister& src2) {
1157 return SMinMaxP(vform, dst, src1, src2, true);
1158 }
1159
1160 LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst,
1161 const LogicVRegister& src1,
1162 const LogicVRegister& src2) {
1163 return SMinMaxP(vform, dst, src1, src2, false);
1164 }
1165
1166 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
1167 const LogicVRegister& src) {
1168 DCHECK_EQ(vform, kFormatD);
1169
1170 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1171 dst.ClearForWrite(vform);
1172 dst.SetUint(vform, 0, dst_val);
1173 return dst;
1174 }
1175
1176 LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst,
1177 const LogicVRegister& src) {
1178 VectorFormat vform_dst =
1179 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1180
1181 int64_t dst_val = 0;
1182 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1183 dst_val += src.Int(vform, i);
1184 }
1185
1186 dst.ClearForWrite(vform_dst);
1187 dst.SetInt(vform_dst, 0, dst_val);
1188 return dst;
1189 }
1190
1191 LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst,
1192 const LogicVRegister& src) {
1193 VectorFormat vform_dst =
1194 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1195
1196 int64_t dst_val = 0;
1197 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1198 dst_val += src.Int(vform, i);
1199 }
1200
1201 dst.ClearForWrite(vform_dst);
1202 dst.SetInt(vform_dst, 0, dst_val);
1203 return dst;
1204 }
1205
1206 LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst,
1207 const LogicVRegister& src) {
1208 VectorFormat vform_dst =
1209 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1210
1211 uint64_t dst_val = 0;
1212 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1213 dst_val += src.Uint(vform, i);
1214 }
1215
1216 dst.ClearForWrite(vform_dst);
1217 dst.SetUint(vform_dst, 0, dst_val);
1218 return dst;
1219 }
1220
1221 LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst,
1222 const LogicVRegister& src, bool max) {
1223 int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1224 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1225 int64_t src_val = src.Int(vform, i);
1226 if (max) {
1227 dst_val = (src_val > dst_val) ? src_val : dst_val;
1228 } else {
1229 dst_val = (src_val < dst_val) ? src_val : dst_val;
1230 }
1231 }
1232 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1233 dst.SetInt(vform, 0, dst_val);
1234 return dst;
1235 }
1236
1237 LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst,
1238 const LogicVRegister& src) {
1239 SMinMaxV(vform, dst, src, true);
1240 return dst;
1241 }
1242
1243 LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst,
1244 const LogicVRegister& src) {
1245 SMinMaxV(vform, dst, src, false);
1246 return dst;
1247 }
1248
1249 LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst,
1250 const LogicVRegister& src1,
1251 const LogicVRegister& src2, bool max) {
1252 dst.ClearForWrite(vform);
1253 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1254 uint64_t src1_val = src1.Uint(vform, i);
1255 uint64_t src2_val = src2.Uint(vform, i);
1256 uint64_t dst_val;
1257 if (max) {
1258 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1259 } else {
1260 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1261 }
1262 dst.SetUint(vform, i, dst_val);
1263 }
1264 return dst;
1265 }
1266
1267 LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst,
1268 const LogicVRegister& src1,
1269 const LogicVRegister& src2) {
1270 return UMinMax(vform, dst, src1, src2, true);
1271 }
1272
1273 LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst,
1274 const LogicVRegister& src1,
1275 const LogicVRegister& src2) {
1276 return UMinMax(vform, dst, src1, src2, false);
1277 }
1278
1279 LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst,
1280 const LogicVRegister& src1,
1281 const LogicVRegister& src2, bool max) {
1282 int lanes = LaneCountFromFormat(vform);
1283 uint64_t result[kMaxLanesPerVector];
1284 const LogicVRegister* src = &src1;
1285 for (int j = 0; j < 2; j++) {
1286 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1287 uint64_t first_val = src->Uint(vform, i);
1288 uint64_t second_val = src->Uint(vform, i + 1);
1289 uint64_t dst_val;
1290 if (max) {
1291 dst_val = (first_val > second_val) ? first_val : second_val;
1292 } else {
1293 dst_val = (first_val < second_val) ? first_val : second_val;
1294 }
1295 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1296 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1297 }
1298 src = &src2;
1299 }
1300 dst.SetUintArray(vform, result);
1301 return dst;
1302 }
1303
1304 LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst,
1305 const LogicVRegister& src1,
1306 const LogicVRegister& src2) {
1307 return UMinMaxP(vform, dst, src1, src2, true);
1308 }
1309
1310 LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst,
1311 const LogicVRegister& src1,
1312 const LogicVRegister& src2) {
1313 return UMinMaxP(vform, dst, src1, src2, false);
1314 }
1315
1316 LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst,
1317 const LogicVRegister& src, bool max) {
1318 uint64_t dst_val = max ? 0 : UINT64_MAX;
1319 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1320 uint64_t src_val = src.Uint(vform, i);
1321 if (max) {
1322 dst_val = (src_val > dst_val) ? src_val : dst_val;
1323 } else {
1324 dst_val = (src_val < dst_val) ? src_val : dst_val;
1325 }
1326 }
1327 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1328 dst.SetUint(vform, 0, dst_val);
1329 return dst;
1330 }
1331
1332 LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst,
1333 const LogicVRegister& src) {
1334 UMinMaxV(vform, dst, src, true);
1335 return dst;
1336 }
1337
1338 LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst,
1339 const LogicVRegister& src) {
1340 UMinMaxV(vform, dst, src, false);
1341 return dst;
1342 }
1343
1344 LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst,
1345 const LogicVRegister& src, int shift) {
1346 DCHECK_GE(shift, 0);
1347 SimVRegister temp;
1348 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1349 return ushl(vform, dst, src, shiftreg);
1350 }
1351
1352 LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst,
1353 const LogicVRegister& src, int shift) {
1354 DCHECK_GE(shift, 0);
1355 SimVRegister temp1, temp2;
1356 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1357 LogicVRegister extendedreg = sxtl(vform, temp2, src);
1358 return sshl(vform, dst, extendedreg, shiftreg);
1359 }
1360
1361 LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst,
1362 const LogicVRegister& src, int shift) {
1363 DCHECK_GE(shift, 0);
1364 SimVRegister temp1, temp2;
1365 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1366 LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1367 return sshl(vform, dst, extendedreg, shiftreg);
1368 }
1369
1370 LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst,
1371 const LogicVRegister& src) {
1372 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1373 return sshll(vform, dst, src, shift);
1374 }
1375
1376 LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst,
1377 const LogicVRegister& src) {
1378 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1379 return sshll2(vform, dst, src, shift);
1380 }
1381
1382 LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst,
1383 const LogicVRegister& src, int shift) {
1384 DCHECK_GE(shift, 0);
1385 SimVRegister temp1, temp2;
1386 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1387 LogicVRegister extendedreg = uxtl(vform, temp2, src);
1388 return ushl(vform, dst, extendedreg, shiftreg);
1389 }
1390
1391 LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst,
1392 const LogicVRegister& src, int shift) {
1393 DCHECK_GE(shift, 0);
1394 SimVRegister temp1, temp2;
1395 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1396 LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1397 return ushl(vform, dst, extendedreg, shiftreg);
1398 }
1399
1400 LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst,
1401 const LogicVRegister& src, int shift) {
1402 dst.ClearForWrite(vform);
1403 int laneCount = LaneCountFromFormat(vform);
1404 for (int i = 0; i < laneCount; i++) {
1405 uint64_t src_lane = src.Uint(vform, i);
1406 uint64_t dst_lane = dst.Uint(vform, i);
1407 uint64_t shifted = src_lane << shift;
1408 uint64_t mask = MaxUintFromFormat(vform) << shift;
1409 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1410 }
1411 return dst;
1412 }
1413
1414 LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst,
1415 const LogicVRegister& src, int shift) {
1416 DCHECK_GE(shift, 0);
1417 SimVRegister temp;
1418 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1419 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1420 }
1421
1422 LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst,
1423 const LogicVRegister& src, int shift) {
1424 DCHECK_GE(shift, 0);
1425 SimVRegister temp;
1426 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1427 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1428 }
1429
1430 LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst,
1431 const LogicVRegister& src, int shift) {
1432 DCHECK_GE(shift, 0);
1433 SimVRegister temp;
1434 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1435 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1436 }
1437
1438 LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst,
1439 const LogicVRegister& src, int shift) {
1440 dst.ClearForWrite(vform);
1441 int laneCount = LaneCountFromFormat(vform);
1442 DCHECK((shift > 0) &&
1443 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1444 for (int i = 0; i < laneCount; i++) {
1445 uint64_t src_lane = src.Uint(vform, i);
1446 uint64_t dst_lane = dst.Uint(vform, i);
1447 uint64_t shifted;
1448 uint64_t mask;
1449 if (shift == 64) {
1450 shifted = 0;
1451 mask = 0;
1452 } else {
1453 shifted = src_lane >> shift;
1454 mask = MaxUintFromFormat(vform) >> shift;
1455 }
1456 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1457 }
1458 return dst;
1459 }
1460
1461 LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst,
1462 const LogicVRegister& src, int shift) {
1463 DCHECK_GE(shift, 0);
1464 SimVRegister temp;
1465 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1466 return ushl(vform, dst, src, shiftreg);
1467 }
1468
1469 LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst,
1470 const LogicVRegister& src, int shift) {
1471 DCHECK_GE(shift, 0);
1472 SimVRegister temp;
1473 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1474 return sshl(vform, dst, src, shiftreg);
1475 }
1476
1477 LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst,
1478 const LogicVRegister& src, int shift) {
1479 SimVRegister temp;
1480 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1481 return add(vform, dst, dst, shifted_reg);
1482 }
1483
1484 LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst,
1485 const LogicVRegister& src, int shift) {
1486 SimVRegister temp;
1487 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1488 return add(vform, dst, dst, shifted_reg);
1489 }
1490
1491 LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst,
1492 const LogicVRegister& src, int shift) {
1493 SimVRegister temp;
1494 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1495 return add(vform, dst, dst, shifted_reg);
1496 }
1497
1498 LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst,
1499 const LogicVRegister& src, int shift) {
1500 SimVRegister temp;
1501 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1502 return add(vform, dst, dst, shifted_reg);
1503 }
1504
1505 LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst,
1506 const LogicVRegister& src) {
1507 uint64_t result[16];
1508 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1509 int laneCount = LaneCountFromFormat(vform);
1510 for (int i = 0; i < laneCount; i++) {
1511 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1512 }
1513
1514 dst.SetUintArray(vform, result);
1515 return dst;
1516 }
1517
1518 LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst,
1519 const LogicVRegister& src) {
1520 uint64_t result[16];
1521 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1522 int laneCount = LaneCountFromFormat(vform);
1523 for (int i = 0; i < laneCount; i++) {
1524 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1525 }
1526
1527 dst.SetUintArray(vform, result);
1528 return dst;
1529 }
1530
1531 LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst,
1532 const LogicVRegister& src) {
1533 uint64_t result[16];
1534 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1535 int laneCount = LaneCountFromFormat(vform);
1536 for (int i = 0; i < laneCount; i++) {
1537 uint64_t value = src.Uint(vform, i);
1538 result[i] = 0;
1539 for (int j = 0; j < laneSizeInBits; j++) {
1540 result[i] += (value & 1);
1541 value >>= 1;
1542 }
1543 }
1544
1545 dst.SetUintArray(vform, result);
1546 return dst;
1547 }
1548
1549 LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst,
1550 const LogicVRegister& src1,
1551 const LogicVRegister& src2) {
1552 dst.ClearForWrite(vform);
1553 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1554 int8_t shift_val = src2.Int(vform, i);
1555 int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1556
1557 // Set signed saturation state.
1558 if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) &&
1559 (lj_src_val != 0)) {
1560 dst.SetSignedSat(i, lj_src_val >= 0);
1561 }
1562
1563 // Set unsigned saturation state.
1564 if (lj_src_val < 0) {
1565 dst.SetUnsignedSat(i, false);
1566 } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) &&
1567 (lj_src_val != 0)) {
1568 dst.SetUnsignedSat(i, true);
1569 }
1570
1571 int64_t src_val = src1.Int(vform, i);
1572 bool src_is_negative = src_val < 0;
1573 if (shift_val > 63) {
1574 dst.SetInt(vform, i, 0);
1575 } else if (shift_val < -63) {
1576 dst.SetRounding(i, src_is_negative);
1577 dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1578 } else {
1579 // Use unsigned types for shifts, as behaviour is undefined for signed
1580 // lhs.
1581 uint64_t usrc_val = static_cast<uint64_t>(src_val);
1582
1583 if (shift_val < 0) {
1584 // Convert to right shift.
1585 shift_val = -shift_val;
1586
1587 // Set rounding state by testing most-significant bit shifted out.
1588 // Rounding only needed on right shifts.
1589 if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1590 dst.SetRounding(i, true);
1591 }
1592
1593 usrc_val >>= shift_val;
1594
1595 if (src_is_negative) {
1596 // Simulate sign-extension.
1597 usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1598 }
1599 } else {
1600 usrc_val <<= shift_val;
1601 }
1602 dst.SetUint(vform, i, usrc_val);
1603 }
1604 }
1605 return dst;
1606 }
1607
1608 LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst,
1609 const LogicVRegister& src1,
1610 const LogicVRegister& src2) {
1611 dst.ClearForWrite(vform);
1612 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1613 int8_t shift_val = src2.Int(vform, i);
1614 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1615
1616 // Set saturation state.
1617 if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) {
1618 dst.SetUnsignedSat(i, true);
1619 }
1620
1621 uint64_t src_val = src1.Uint(vform, i);
1622 if ((shift_val > 63) || (shift_val < -64)) {
1623 dst.SetUint(vform, i, 0);
1624 } else {
1625 if (shift_val < 0) {
1626 // Set rounding state. Rounding only needed on right shifts.
1627 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1628 dst.SetRounding(i, true);
1629 }
1630
1631 if (shift_val == -64) {
1632 src_val = 0;
1633 } else {
1634 src_val >>= -shift_val;
1635 }
1636 } else {
1637 src_val <<= shift_val;
1638 }
1639 dst.SetUint(vform, i, src_val);
1640 }
1641 }
1642 return dst;
1643 }
1644
1645 LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst,
1646 const LogicVRegister& src) {
1647 dst.ClearForWrite(vform);
1648 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1649 // Test for signed saturation.
1650 int64_t sa = src.Int(vform, i);
1651 if (sa == MinIntFromFormat(vform)) {
1652 dst.SetSignedSat(i, true);
1653 }
1654 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1655 }
1656 return dst;
1657 }
1658
1659 LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst,
1660 const LogicVRegister& src) {
1661 dst.ClearForWrite(vform);
1662 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1663 int64_t sa = dst.IntLeftJustified(vform, i);
1664 uint64_t ub = src.UintLeftJustified(vform, i);
1665 uint64_t ur = sa + ub;
1666
1667 int64_t sr = bit_cast<int64_t>(ur);
1668 if (sr < sa) { // Test for signed positive saturation.
1669 dst.SetInt(vform, i, MaxIntFromFormat(vform));
1670 } else {
1671 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
1672 }
1673 }
1674 return dst;
1675 }
1676
1677 LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst,
1678 const LogicVRegister& src) {
1679 dst.ClearForWrite(vform);
1680 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1681 uint64_t ua = dst.UintLeftJustified(vform, i);
1682 int64_t sb = src.IntLeftJustified(vform, i);
1683 uint64_t ur = ua + sb;
1684
1685 if ((sb > 0) && (ur <= ua)) {
1686 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
1687 } else if ((sb < 0) && (ur >= ua)) {
1688 dst.SetUint(vform, i, 0); // Negative saturation.
1689 } else {
1690 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
1691 }
1692 }
1693 return dst;
1694 }
1695
1696 LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst,
1697 const LogicVRegister& src) {
1698 dst.ClearForWrite(vform);
1699 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1700 // Test for signed saturation.
1701 int64_t sa = src.Int(vform, i);
1702 if (sa == MinIntFromFormat(vform)) {
1703 dst.SetSignedSat(i, true);
1704 }
1705 if (sa < 0) {
1706 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1707 } else {
1708 dst.SetInt(vform, i, sa);
1709 }
1710 }
1711 return dst;
1712 }
1713
1714 LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform,
1715 LogicVRegister dst, bool dstIsSigned,
1716 const LogicVRegister& src,
1717 bool srcIsSigned) {
1718 bool upperhalf = false;
1719 VectorFormat srcform = kFormatUndefined;
1720 int64_t ssrc[8];
1721 uint64_t usrc[8];
1722
1723 switch (dstform) {
1724 case kFormat8B:
1725 upperhalf = false;
1726 srcform = kFormat8H;
1727 break;
1728 case kFormat16B:
1729 upperhalf = true;
1730 srcform = kFormat8H;
1731 break;
1732 case kFormat4H:
1733 upperhalf = false;
1734 srcform = kFormat4S;
1735 break;
1736 case kFormat8H:
1737 upperhalf = true;
1738 srcform = kFormat4S;
1739 break;
1740 case kFormat2S:
1741 upperhalf = false;
1742 srcform = kFormat2D;
1743 break;
1744 case kFormat4S:
1745 upperhalf = true;
1746 srcform = kFormat2D;
1747 break;
1748 case kFormatB:
1749 upperhalf = false;
1750 srcform = kFormatH;
1751 break;
1752 case kFormatH:
1753 upperhalf = false;
1754 srcform = kFormatS;
1755 break;
1756 case kFormatS:
1757 upperhalf = false;
1758 srcform = kFormatD;
1759 break;
1760 default:
1761 UNIMPLEMENTED();
1762 }
1763
1764 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1765 ssrc[i] = src.Int(srcform, i);
1766 usrc[i] = src.Uint(srcform, i);
1767 }
1768
1769 int offset;
1770 if (upperhalf) {
1771 offset = LaneCountFromFormat(dstform) / 2;
1772 } else {
1773 offset = 0;
1774 dst.ClearForWrite(dstform);
1775 }
1776
1777 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1778 // Test for signed saturation
1779 if (ssrc[i] > MaxIntFromFormat(dstform)) {
1780 dst.SetSignedSat(offset + i, true);
1781 } else if (ssrc[i] < MinIntFromFormat(dstform)) {
1782 dst.SetSignedSat(offset + i, false);
1783 }
1784
1785 // Test for unsigned saturation
1786 if (srcIsSigned) {
1787 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
1788 dst.SetUnsignedSat(offset + i, true);
1789 } else if (ssrc[i] < 0) {
1790 dst.SetUnsignedSat(offset + i, false);
1791 }
1792 } else {
1793 if (usrc[i] > MaxUintFromFormat(dstform)) {
1794 dst.SetUnsignedSat(offset + i, true);
1795 }
1796 }
1797
1798 int64_t result;
1799 if (srcIsSigned) {
1800 result = ssrc[i] & MaxUintFromFormat(dstform);
1801 } else {
1802 result = usrc[i] & MaxUintFromFormat(dstform);
1803 }
1804
1805 if (dstIsSigned) {
1806 dst.SetInt(dstform, offset + i, result);
1807 } else {
1808 dst.SetUint(dstform, offset + i, result);
1809 }
1810 }
1811 return dst;
1812 }
1813
1814 LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst,
1815 const LogicVRegister& src) {
1816 return ExtractNarrow(vform, dst, true, src, true);
1817 }
1818
1819 LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst,
1820 const LogicVRegister& src) {
1821 return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform);
1822 }
1823
1824 LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst,
1825 const LogicVRegister& src) {
1826 return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
1827 }
1828
1829 LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst,
1830 const LogicVRegister& src) {
1831 return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
1832 }
1833
1834 LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst,
1835 const LogicVRegister& src1,
1836 const LogicVRegister& src2, bool issigned) {
1837 dst.ClearForWrite(vform);
1838 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1839 if (issigned) {
1840 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
1841 sr = sr > 0 ? sr : -sr;
1842 dst.SetInt(vform, i, sr);
1843 } else {
1844 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
1845 sr = sr > 0 ? sr : -sr;
1846 dst.SetUint(vform, i, sr);
1847 }
1848 }
1849 return dst;
1850 }
1851
1852 LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst,
1853 const LogicVRegister& src1,
1854 const LogicVRegister& src2) {
1855 SimVRegister temp;
1856 dst.ClearForWrite(vform);
1857 AbsDiff(vform, temp, src1, src2, true);
1858 add(vform, dst, dst, temp);
1859 return dst;
1860 }
1861
1862 LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst,
1863 const LogicVRegister& src1,
1864 const LogicVRegister& src2) {
1865 SimVRegister temp;
1866 dst.ClearForWrite(vform);
1867 AbsDiff(vform, temp, src1, src2, false);
1868 add(vform, dst, dst, temp);
1869 return dst;
1870 }
1871
1872 LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst,
1873 const LogicVRegister& src) {
1874 dst.ClearForWrite(vform);
1875 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1876 dst.SetUint(vform, i, ~src.Uint(vform, i));
1877 }
1878 return dst;
1879 }
1880
1881 LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst,
1882 const LogicVRegister& src) {
1883 uint64_t result[16];
1884 int laneCount = LaneCountFromFormat(vform);
1885 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1886 uint64_t reversed_value;
1887 uint64_t value;
1888 for (int i = 0; i < laneCount; i++) {
1889 value = src.Uint(vform, i);
1890 reversed_value = 0;
1891 for (int j = 0; j < laneSizeInBits; j++) {
1892 reversed_value = (reversed_value << 1) | (value & 1);
1893 value >>= 1;
1894 }
1895 result[i] = reversed_value;
1896 }
1897
1898 dst.SetUintArray(vform, result);
1899 return dst;
1900 }
1901
1902 LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst,
1903 const LogicVRegister& src, int revSize) {
1904 uint64_t result[16];
1905 int laneCount = LaneCountFromFormat(vform);
1906 int laneSize = LaneSizeInBytesFromFormat(vform);
1907 int lanesPerLoop = revSize / laneSize;
1908 for (int i = 0; i < laneCount; i += lanesPerLoop) {
1909 for (int j = 0; j < lanesPerLoop; j++) {
1910 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
1911 }
1912 }
1913 dst.SetUintArray(vform, result);
1914 return dst;
1915 }
1916
1917 LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst,
1918 const LogicVRegister& src) {
1919 return rev(vform, dst, src, 2);
1920 }
1921
1922 LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst,
1923 const LogicVRegister& src) {
1924 return rev(vform, dst, src, 4);
1925 }
1926
1927 LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst,
1928 const LogicVRegister& src) {
1929 return rev(vform, dst, src, 8);
1930 }
1931
1932 LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst,
1933 const LogicVRegister& src, bool is_signed,
1934 bool do_accumulate) {
1935 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
1936 DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U);
1937 DCHECK_LE(LaneCountFromFormat(vform), 8);
1938
1939 uint64_t result[8];
1940 int lane_count = LaneCountFromFormat(vform);
1941 for (int i = 0; i < lane_count; i++) {
1942 if (is_signed) {
1943 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
1944 src.Int(vformsrc, 2 * i + 1));
1945 } else {
1946 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
1947 }
1948 }
1949
1950 dst.ClearForWrite(vform);
1951 for (int i = 0; i < lane_count; ++i) {
1952 if (do_accumulate) {
1953 result[i] += dst.Uint(vform, i);
1954 }
1955 dst.SetUint(vform, i, result[i]);
1956 }
1957
1958 return dst;
1959 }
1960
1961 LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst,
1962 const LogicVRegister& src) {
1963 return addlp(vform, dst, src, true, false);
1964 }
1965
1966 LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst,
1967 const LogicVRegister& src) {
1968 return addlp(vform, dst, src, false, false);
1969 }
1970
1971 LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst,
1972 const LogicVRegister& src) {
1973 return addlp(vform, dst, src, true, true);
1974 }
1975
1976 LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst,
1977 const LogicVRegister& src) {
1978 return addlp(vform, dst, src, false, true);
1979 }
1980
1981 LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst,
1982 const LogicVRegister& src1,
1983 const LogicVRegister& src2, int index) {
1984 uint8_t result[16];
1985 int laneCount = LaneCountFromFormat(vform);
1986 for (int i = 0; i < laneCount - index; ++i) {
1987 result[i] = src1.Uint(vform, i + index);
1988 }
1989 for (int i = 0; i < index; ++i) {
1990 result[laneCount - index + i] = src2.Uint(vform, i);
1991 }
1992 dst.ClearForWrite(vform);
1993 for (int i = 0; i < laneCount; ++i) {
1994 dst.SetUint(vform, i, result[i]);
1995 }
1996 return dst;
1997 }
1998
1999 LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst,
2000 const LogicVRegister& src,
2001 int src_index) {
2002 int laneCount = LaneCountFromFormat(vform);
2003 uint64_t value = src.Uint(vform, src_index);
2004 dst.ClearForWrite(vform);
2005 for (int i = 0; i < laneCount; ++i) {
2006 dst.SetUint(vform, i, value);
2007 }
2008 return dst;
2009 }
2010
2011 LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst,
2012 uint64_t imm) {
2013 int laneCount = LaneCountFromFormat(vform);
2014 uint64_t value = imm & MaxUintFromFormat(vform);
2015 dst.ClearForWrite(vform);
2016 for (int i = 0; i < laneCount; ++i) {
2017 dst.SetUint(vform, i, value);
2018 }
2019 return dst;
2020 }
2021
2022 LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst,
2023 int dst_index, const LogicVRegister& src,
2024 int src_index) {
2025 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2026 return dst;
2027 }
2028
2029 LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst,
2030 int dst_index, uint64_t imm) {
2031 uint64_t value = imm & MaxUintFromFormat(vform);
2032 dst.SetUint(vform, dst_index, value);
2033 return dst;
2034 }
2035
2036 LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst,
2037 uint64_t imm) {
2038 int laneCount = LaneCountFromFormat(vform);
2039 dst.ClearForWrite(vform);
2040 for (int i = 0; i < laneCount; ++i) {
2041 dst.SetUint(vform, i, imm);
2042 }
2043 return dst;
2044 }
2045
2046 LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst,
2047 uint64_t imm) {
2048 int laneCount = LaneCountFromFormat(vform);
2049 dst.ClearForWrite(vform);
2050 for (int i = 0; i < laneCount; ++i) {
2051 dst.SetUint(vform, i, ~imm);
2052 }
2053 return dst;
2054 }
2055
2056 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
2057 const LogicVRegister& src, uint64_t imm) {
2058 uint64_t result[16];
2059 int laneCount = LaneCountFromFormat(vform);
2060 for (int i = 0; i < laneCount; ++i) {
2061 result[i] = src.Uint(vform, i) | imm;
2062 }
2063 dst.SetUintArray(vform, result);
2064 return dst;
2065 }
2066
2067 LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst,
2068 const LogicVRegister& src) {
2069 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2070
2071 dst.ClearForWrite(vform);
2072 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2073 dst.SetUint(vform, i, src.Uint(vform_half, i));
2074 }
2075 return dst;
2076 }
2077
2078 LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst,
2079 const LogicVRegister& src) {
2080 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2081
2082 dst.ClearForWrite(vform);
2083 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2084 dst.SetInt(vform, i, src.Int(vform_half, i));
2085 }
2086 return dst;
2087 }
2088
2089 LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst,
2090 const LogicVRegister& src) {
2091 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2092 int lane_count = LaneCountFromFormat(vform);
2093
2094 dst.ClearForWrite(vform);
2095 for (int i = 0; i < lane_count; i++) {
2096 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2097 }
2098 return dst;
2099 }
2100
2101 LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst,
2102 const LogicVRegister& src) {
2103 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2104 int lane_count = LaneCountFromFormat(vform);
2105
2106 dst.ClearForWrite(vform);
2107 for (int i = 0; i < lane_count; i++) {
2108 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2109 }
2110 return dst;
2111 }
2112
2113 LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst,
2114 const LogicVRegister& src, int shift) {
2115 SimVRegister temp;
2116 VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2117 VectorFormat vform_dst = vform;
2118 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2119 return ExtractNarrow(vform_dst, dst, false, shifted_src, false);
2120 }
2121
2122 LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst,
2123 const LogicVRegister& src, int shift) {
2124 SimVRegister temp;
2125 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2126 VectorFormat vformdst = vform;
2127 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2128 return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2129 }
2130
2131 LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst,
2132 const LogicVRegister& src, int shift) {
2133 SimVRegister temp;
2134 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2135 VectorFormat vformdst = vform;
2136 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2137 return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2138 }
2139
2140 LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst,
2141 const LogicVRegister& src, int shift) {
2142 SimVRegister temp;
2143 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2144 VectorFormat vformdst = vform;
2145 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2146 return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2147 }
2148
2149 LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst,
2150 const LogicVRegister& ind,
2151 bool zero_out_of_bounds,
2152 const LogicVRegister* tab1,
2153 const LogicVRegister* tab2,
2154 const LogicVRegister* tab3,
2155 const LogicVRegister* tab4) {
2156 DCHECK_NOT_NULL(tab1);
2157 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
2158 uint64_t result[kMaxLanesPerVector];
2159 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2160 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
2161 }
2162 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2163 uint64_t j = ind.Uint(vform, i);
2164 int tab_idx = static_cast<int>(j >> 4);
2165 int j_idx = static_cast<int>(j & 15);
2166 if ((tab_idx < 4) && (tab[tab_idx] != NULL)) {
2167 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
2168 }
2169 }
2170 dst.SetUintArray(vform, result);
2171 return dst;
2172 }
2173
2174 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2175 const LogicVRegister& tab,
2176 const LogicVRegister& ind) {
2177 return Table(vform, dst, ind, true, &tab);
2178 }
2179
2180 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2181 const LogicVRegister& tab,
2182 const LogicVRegister& tab2,
2183 const LogicVRegister& ind) {
2184 return Table(vform, dst, ind, true, &tab, &tab2);
2185 }
2186
2187 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2188 const LogicVRegister& tab,
2189 const LogicVRegister& tab2,
2190 const LogicVRegister& tab3,
2191 const LogicVRegister& ind) {
2192 return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
2193 }
2194
2195 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2196 const LogicVRegister& tab,
2197 const LogicVRegister& tab2,
2198 const LogicVRegister& tab3,
2199 const LogicVRegister& tab4,
2200 const LogicVRegister& ind) {
2201 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
2202 }
2203
2204 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2205 const LogicVRegister& tab,
2206 const LogicVRegister& ind) {
2207 return Table(vform, dst, ind, false, &tab);
2208 }
2209
2210 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2211 const LogicVRegister& tab,
2212 const LogicVRegister& tab2,
2213 const LogicVRegister& ind) {
2214 return Table(vform, dst, ind, false, &tab, &tab2);
2215 }
2216
2217 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2218 const LogicVRegister& tab,
2219 const LogicVRegister& tab2,
2220 const LogicVRegister& tab3,
2221 const LogicVRegister& ind) {
2222 return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
2223 }
2224
2225 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2226 const LogicVRegister& tab,
2227 const LogicVRegister& tab2,
2228 const LogicVRegister& tab3,
2229 const LogicVRegister& tab4,
2230 const LogicVRegister& ind) {
2231 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
2232 }
2233
2234 LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst,
2235 const LogicVRegister& src, int shift) {
2236 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2237 }
2238
2239 LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst,
2240 const LogicVRegister& src, int shift) {
2241 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2242 }
2243
2244 LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst,
2245 const LogicVRegister& src, int shift) {
2246 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2247 }
2248
2249 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst,
2250 const LogicVRegister& src, int shift) {
2251 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2252 }
2253
2254 LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst,
2255 const LogicVRegister& src, int shift) {
2256 SimVRegister temp;
2257 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2258 VectorFormat vformdst = vform;
2259 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2260 return sqxtn(vformdst, dst, shifted_src);
2261 }
2262
2263 LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst,
2264 const LogicVRegister& src, int shift) {
2265 SimVRegister temp;
2266 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2267 VectorFormat vformdst = vform;
2268 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2269 return sqxtn(vformdst, dst, shifted_src);
2270 }
2271
2272 LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst,
2273 const LogicVRegister& src, int shift) {
2274 SimVRegister temp;
2275 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2276 VectorFormat vformdst = vform;
2277 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2278 return sqxtn(vformdst, dst, shifted_src);
2279 }
2280
2281 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst,
2282 const LogicVRegister& src, int shift) {
2283 SimVRegister temp;
2284 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2285 VectorFormat vformdst = vform;
2286 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2287 return sqxtn(vformdst, dst, shifted_src);
2288 }
2289
2290 LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst,
2291 const LogicVRegister& src, int shift) {
2292 SimVRegister temp;
2293 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2294 VectorFormat vformdst = vform;
2295 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2296 return sqxtun(vformdst, dst, shifted_src);
2297 }
2298
2299 LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst,
2300 const LogicVRegister& src, int shift) {
2301 SimVRegister temp;
2302 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2303 VectorFormat vformdst = vform;
2304 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2305 return sqxtun(vformdst, dst, shifted_src);
2306 }
2307
2308 LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst,
2309 const LogicVRegister& src, int shift) {
2310 SimVRegister temp;
2311 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2312 VectorFormat vformdst = vform;
2313 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2314 return sqxtun(vformdst, dst, shifted_src);
2315 }
2316
2317 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst,
2318 const LogicVRegister& src, int shift) {
2319 SimVRegister temp;
2320 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2321 VectorFormat vformdst = vform;
2322 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2323 return sqxtun(vformdst, dst, shifted_src);
2324 }
2325
2326 LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst,
2327 const LogicVRegister& src1,
2328 const LogicVRegister& src2) {
2329 SimVRegister temp1, temp2;
2330 uxtl(vform, temp1, src1);
2331 uxtl(vform, temp2, src2);
2332 add(vform, dst, temp1, temp2);
2333 return dst;
2334 }
2335
2336 LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst,
2337 const LogicVRegister& src1,
2338 const LogicVRegister& src2) {
2339 SimVRegister temp1, temp2;
2340 uxtl2(vform, temp1, src1);
2341 uxtl2(vform, temp2, src2);
2342 add(vform, dst, temp1, temp2);
2343 return dst;
2344 }
2345
2346 LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst,
2347 const LogicVRegister& src1,
2348 const LogicVRegister& src2) {
2349 SimVRegister temp;
2350 uxtl(vform, temp, src2);
2351 add(vform, dst, src1, temp);
2352 return dst;
2353 }
2354
2355 LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst,
2356 const LogicVRegister& src1,
2357 const LogicVRegister& src2) {
2358 SimVRegister temp;
2359 uxtl2(vform, temp, src2);
2360 add(vform, dst, src1, temp);
2361 return dst;
2362 }
2363
2364 LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst,
2365 const LogicVRegister& src1,
2366 const LogicVRegister& src2) {
2367 SimVRegister temp1, temp2;
2368 sxtl(vform, temp1, src1);
2369 sxtl(vform, temp2, src2);
2370 add(vform, dst, temp1, temp2);
2371 return dst;
2372 }
2373
2374 LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst,
2375 const LogicVRegister& src1,
2376 const LogicVRegister& src2) {
2377 SimVRegister temp1, temp2;
2378 sxtl2(vform, temp1, src1);
2379 sxtl2(vform, temp2, src2);
2380 add(vform, dst, temp1, temp2);
2381 return dst;
2382 }
2383
2384 LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst,
2385 const LogicVRegister& src1,
2386 const LogicVRegister& src2) {
2387 SimVRegister temp;
2388 sxtl(vform, temp, src2);
2389 add(vform, dst, src1, temp);
2390 return dst;
2391 }
2392
2393 LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst,
2394 const LogicVRegister& src1,
2395 const LogicVRegister& src2) {
2396 SimVRegister temp;
2397 sxtl2(vform, temp, src2);
2398 add(vform, dst, src1, temp);
2399 return dst;
2400 }
2401
2402 LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst,
2403 const LogicVRegister& src1,
2404 const LogicVRegister& src2) {
2405 SimVRegister temp1, temp2;
2406 uxtl(vform, temp1, src1);
2407 uxtl(vform, temp2, src2);
2408 sub(vform, dst, temp1, temp2);
2409 return dst;
2410 }
2411
2412 LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst,
2413 const LogicVRegister& src1,
2414 const LogicVRegister& src2) {
2415 SimVRegister temp1, temp2;
2416 uxtl2(vform, temp1, src1);
2417 uxtl2(vform, temp2, src2);
2418 sub(vform, dst, temp1, temp2);
2419 return dst;
2420 }
2421
2422 LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst,
2423 const LogicVRegister& src1,
2424 const LogicVRegister& src2) {
2425 SimVRegister temp;
2426 uxtl(vform, temp, src2);
2427 sub(vform, dst, src1, temp);
2428 return dst;
2429 }
2430
2431 LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst,
2432 const LogicVRegister& src1,
2433 const LogicVRegister& src2) {
2434 SimVRegister temp;
2435 uxtl2(vform, temp, src2);
2436 sub(vform, dst, src1, temp);
2437 return dst;
2438 }
2439
2440 LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst,
2441 const LogicVRegister& src1,
2442 const LogicVRegister& src2) {
2443 SimVRegister temp1, temp2;
2444 sxtl(vform, temp1, src1);
2445 sxtl(vform, temp2, src2);
2446 sub(vform, dst, temp1, temp2);
2447 return dst;
2448 }
2449
2450 LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst,
2451 const LogicVRegister& src1,
2452 const LogicVRegister& src2) {
2453 SimVRegister temp1, temp2;
2454 sxtl2(vform, temp1, src1);
2455 sxtl2(vform, temp2, src2);
2456 sub(vform, dst, temp1, temp2);
2457 return dst;
2458 }
2459
2460 LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst,
2461 const LogicVRegister& src1,
2462 const LogicVRegister& src2) {
2463 SimVRegister temp;
2464 sxtl(vform, temp, src2);
2465 sub(vform, dst, src1, temp);
2466 return dst;
2467 }
2468
2469 LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst,
2470 const LogicVRegister& src1,
2471 const LogicVRegister& src2) {
2472 SimVRegister temp;
2473 sxtl2(vform, temp, src2);
2474 sub(vform, dst, src1, temp);
2475 return dst;
2476 }
2477
2478 LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst,
2479 const LogicVRegister& src1,
2480 const LogicVRegister& src2) {
2481 SimVRegister temp1, temp2;
2482 uxtl(vform, temp1, src1);
2483 uxtl(vform, temp2, src2);
2484 uaba(vform, dst, temp1, temp2);
2485 return dst;
2486 }
2487
2488 LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst,
2489 const LogicVRegister& src1,
2490 const LogicVRegister& src2) {
2491 SimVRegister temp1, temp2;
2492 uxtl2(vform, temp1, src1);
2493 uxtl2(vform, temp2, src2);
2494 uaba(vform, dst, temp1, temp2);
2495 return dst;
2496 }
2497
2498 LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst,
2499 const LogicVRegister& src1,
2500 const LogicVRegister& src2) {
2501 SimVRegister temp1, temp2;
2502 sxtl(vform, temp1, src1);
2503 sxtl(vform, temp2, src2);
2504 saba(vform, dst, temp1, temp2);
2505 return dst;
2506 }
2507
2508 LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst,
2509 const LogicVRegister& src1,
2510 const LogicVRegister& src2) {
2511 SimVRegister temp1, temp2;
2512 sxtl2(vform, temp1, src1);
2513 sxtl2(vform, temp2, src2);
2514 saba(vform, dst, temp1, temp2);
2515 return dst;
2516 }
2517
2518 LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst,
2519 const LogicVRegister& src1,
2520 const LogicVRegister& src2) {
2521 SimVRegister temp1, temp2;
2522 uxtl(vform, temp1, src1);
2523 uxtl(vform, temp2, src2);
2524 AbsDiff(vform, dst, temp1, temp2, false);
2525 return dst;
2526 }
2527
2528 LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst,
2529 const LogicVRegister& src1,
2530 const LogicVRegister& src2) {
2531 SimVRegister temp1, temp2;
2532 uxtl2(vform, temp1, src1);
2533 uxtl2(vform, temp2, src2);
2534 AbsDiff(vform, dst, temp1, temp2, false);
2535 return dst;
2536 }
2537
2538 LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst,
2539 const LogicVRegister& src1,
2540 const LogicVRegister& src2) {
2541 SimVRegister temp1, temp2;
2542 sxtl(vform, temp1, src1);
2543 sxtl(vform, temp2, src2);
2544 AbsDiff(vform, dst, temp1, temp2, true);
2545 return dst;
2546 }
2547
2548 LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst,
2549 const LogicVRegister& src1,
2550 const LogicVRegister& src2) {
2551 SimVRegister temp1, temp2;
2552 sxtl2(vform, temp1, src1);
2553 sxtl2(vform, temp2, src2);
2554 AbsDiff(vform, dst, temp1, temp2, true);
2555 return dst;
2556 }
2557
2558 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
2559 const LogicVRegister& src1,
2560 const LogicVRegister& src2) {
2561 SimVRegister temp1, temp2;
2562 uxtl(vform, temp1, src1);
2563 uxtl(vform, temp2, src2);
2564 mul(vform, dst, temp1, temp2);
2565 return dst;
2566 }
2567
2568 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
2569 const LogicVRegister& src1,
2570 const LogicVRegister& src2) {
2571 SimVRegister temp1, temp2;
2572 uxtl2(vform, temp1, src1);
2573 uxtl2(vform, temp2, src2);
2574 mul(vform, dst, temp1, temp2);
2575 return dst;
2576 }
2577
2578 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
2579 const LogicVRegister& src1,
2580 const LogicVRegister& src2) {
2581 SimVRegister temp1, temp2;
2582 sxtl(vform, temp1, src1);
2583 sxtl(vform, temp2, src2);
2584 mul(vform, dst, temp1, temp2);
2585 return dst;
2586 }
2587
2588 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
2589 const LogicVRegister& src1,
2590 const LogicVRegister& src2) {
2591 SimVRegister temp1, temp2;
2592 sxtl2(vform, temp1, src1);
2593 sxtl2(vform, temp2, src2);
2594 mul(vform, dst, temp1, temp2);
2595 return dst;
2596 }
2597
2598 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
2599 const LogicVRegister& src1,
2600 const LogicVRegister& src2) {
2601 SimVRegister temp1, temp2;
2602 uxtl(vform, temp1, src1);
2603 uxtl(vform, temp2, src2);
2604 mls(vform, dst, temp1, temp2);
2605 return dst;
2606 }
2607
2608 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
2609 const LogicVRegister& src1,
2610 const LogicVRegister& src2) {
2611 SimVRegister temp1, temp2;
2612 uxtl2(vform, temp1, src1);
2613 uxtl2(vform, temp2, src2);
2614 mls(vform, dst, temp1, temp2);
2615 return dst;
2616 }
2617
2618 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
2619 const LogicVRegister& src1,
2620 const LogicVRegister& src2) {
2621 SimVRegister temp1, temp2;
2622 sxtl(vform, temp1, src1);
2623 sxtl(vform, temp2, src2);
2624 mls(vform, dst, temp1, temp2);
2625 return dst;
2626 }
2627
2628 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
2629 const LogicVRegister& src1,
2630 const LogicVRegister& src2) {
2631 SimVRegister temp1, temp2;
2632 sxtl2(vform, temp1, src1);
2633 sxtl2(vform, temp2, src2);
2634 mls(vform, dst, temp1, temp2);
2635 return dst;
2636 }
2637
2638 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
2639 const LogicVRegister& src1,
2640 const LogicVRegister& src2) {
2641 SimVRegister temp1, temp2;
2642 uxtl(vform, temp1, src1);
2643 uxtl(vform, temp2, src2);
2644 mla(vform, dst, temp1, temp2);
2645 return dst;
2646 }
2647
2648 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
2649 const LogicVRegister& src1,
2650 const LogicVRegister& src2) {
2651 SimVRegister temp1, temp2;
2652 uxtl2(vform, temp1, src1);
2653 uxtl2(vform, temp2, src2);
2654 mla(vform, dst, temp1, temp2);
2655 return dst;
2656 }
2657
2658 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
2659 const LogicVRegister& src1,
2660 const LogicVRegister& src2) {
2661 SimVRegister temp1, temp2;
2662 sxtl(vform, temp1, src1);
2663 sxtl(vform, temp2, src2);
2664 mla(vform, dst, temp1, temp2);
2665 return dst;
2666 }
2667
2668 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
2669 const LogicVRegister& src1,
2670 const LogicVRegister& src2) {
2671 SimVRegister temp1, temp2;
2672 sxtl2(vform, temp1, src1);
2673 sxtl2(vform, temp2, src2);
2674 mla(vform, dst, temp1, temp2);
2675 return dst;
2676 }
2677
2678 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
2679 const LogicVRegister& src1,
2680 const LogicVRegister& src2) {
2681 SimVRegister temp;
2682 LogicVRegister product = sqdmull(vform, temp, src1, src2);
2683 return add(vform, dst, dst, product).SignedSaturate(vform);
2684 }
2685
2686 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
2687 const LogicVRegister& src1,
2688 const LogicVRegister& src2) {
2689 SimVRegister temp;
2690 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2691 return add(vform, dst, dst, product).SignedSaturate(vform);
2692 }
2693
2694 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
2695 const LogicVRegister& src1,
2696 const LogicVRegister& src2) {
2697 SimVRegister temp;
2698 LogicVRegister product = sqdmull(vform, temp, src1, src2);
2699 return sub(vform, dst, dst, product).SignedSaturate(vform);
2700 }
2701
2702 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
2703 const LogicVRegister& src1,
2704 const LogicVRegister& src2) {
2705 SimVRegister temp;
2706 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2707 return sub(vform, dst, dst, product).SignedSaturate(vform);
2708 }
2709
2710 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
2711 const LogicVRegister& src1,
2712 const LogicVRegister& src2) {
2713 SimVRegister temp;
2714 LogicVRegister product = smull(vform, temp, src1, src2);
2715 return add(vform, dst, product, product).SignedSaturate(vform);
2716 }
2717
2718 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
2719 const LogicVRegister& src1,
2720 const LogicVRegister& src2) {
2721 SimVRegister temp;
2722 LogicVRegister product = smull2(vform, temp, src1, src2);
2723 return add(vform, dst, product, product).SignedSaturate(vform);
2724 }
2725
2726 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
2727 const LogicVRegister& src1,
2728 const LogicVRegister& src2, bool round) {
2729 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
2730 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
2731 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
2732
2733 int esize = LaneSizeInBitsFromFormat(vform);
2734 int round_const = round ? (1 << (esize - 2)) : 0;
2735 int64_t product;
2736
2737 dst.ClearForWrite(vform);
2738 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2739 product = src1.Int(vform, i) * src2.Int(vform, i);
2740 product += round_const;
2741 product = product >> (esize - 1);
2742
2743 if (product > MaxIntFromFormat(vform)) {
2744 product = MaxIntFromFormat(vform);
2745 } else if (product < MinIntFromFormat(vform)) {
2746 product = MinIntFromFormat(vform);
2747 }
2748 dst.SetInt(vform, i, product);
2749 }
2750 return dst;
2751 }
2752
2753 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
2754 const LogicVRegister& src1,
2755 const LogicVRegister& src2) {
2756 return sqrdmulh(vform, dst, src1, src2, false);
2757 }
2758
2759 LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst,
2760 const LogicVRegister& src1,
2761 const LogicVRegister& src2) {
2762 SimVRegister temp;
2763 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2764 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2765 return dst;
2766 }
2767
2768 LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst,
2769 const LogicVRegister& src1,
2770 const LogicVRegister& src2) {
2771 SimVRegister temp;
2772 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2773 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2774 return dst;
2775 }
2776
2777 LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst,
2778 const LogicVRegister& src1,
2779 const LogicVRegister& src2) {
2780 SimVRegister temp;
2781 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2782 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2783 return dst;
2784 }
2785
2786 LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst,
2787 const LogicVRegister& src1,
2788 const LogicVRegister& src2) {
2789 SimVRegister temp;
2790 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2791 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2792 return dst;
2793 }
2794
2795 LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst,
2796 const LogicVRegister& src1,
2797 const LogicVRegister& src2) {
2798 SimVRegister temp;
2799 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2800 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2801 return dst;
2802 }
2803
2804 LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst,
2805 const LogicVRegister& src1,
2806 const LogicVRegister& src2) {
2807 SimVRegister temp;
2808 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2809 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2810 return dst;
2811 }
2812
2813 LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst,
2814 const LogicVRegister& src1,
2815 const LogicVRegister& src2) {
2816 SimVRegister temp;
2817 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2818 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2819 return dst;
2820 }
2821
2822 LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst,
2823 const LogicVRegister& src1,
2824 const LogicVRegister& src2) {
2825 SimVRegister temp;
2826 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2827 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2828 return dst;
2829 }
2830
2831 LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst,
2832 const LogicVRegister& src1,
2833 const LogicVRegister& src2) {
2834 uint64_t result[16];
2835 int laneCount = LaneCountFromFormat(vform);
2836 int pairs = laneCount / 2;
2837 for (int i = 0; i < pairs; ++i) {
2838 result[2 * i] = src1.Uint(vform, 2 * i);
2839 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
2840 }
2841
2842 dst.SetUintArray(vform, result);
2843 return dst;
2844 }
2845
2846 LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst,
2847 const LogicVRegister& src1,
2848 const LogicVRegister& src2) {
2849 uint64_t result[16];
2850 int laneCount = LaneCountFromFormat(vform);
2851 int pairs = laneCount / 2;
2852 for (int i = 0; i < pairs; ++i) {
2853 result[2 * i] = src1.Uint(vform, (2 * i) + 1);
2854 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
2855 }
2856
2857 dst.SetUintArray(vform, result);
2858 return dst;
2859 }
2860
2861 LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst,
2862 const LogicVRegister& src1,
2863 const LogicVRegister& src2) {
2864 uint64_t result[16];
2865 int laneCount = LaneCountFromFormat(vform);
2866 int pairs = laneCount / 2;
2867 for (int i = 0; i < pairs; ++i) {
2868 result[2 * i] = src1.Uint(vform, i);
2869 result[(2 * i) + 1] = src2.Uint(vform, i);
2870 }
2871
2872 dst.SetUintArray(vform, result);
2873 return dst;
2874 }
2875
2876 LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst,
2877 const LogicVRegister& src1,
2878 const LogicVRegister& src2) {
2879 uint64_t result[16];
2880 int laneCount = LaneCountFromFormat(vform);
2881 int pairs = laneCount / 2;
2882 for (int i = 0; i < pairs; ++i) {
2883 result[2 * i] = src1.Uint(vform, pairs + i);
2884 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
2885 }
2886
2887 dst.SetUintArray(vform, result);
2888 return dst;
2889 }
2890
2891 LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst,
2892 const LogicVRegister& src1,
2893 const LogicVRegister& src2) {
2894 uint64_t result[32];
2895 int laneCount = LaneCountFromFormat(vform);
2896 for (int i = 0; i < laneCount; ++i) {
2897 result[i] = src1.Uint(vform, i);
2898 result[laneCount + i] = src2.Uint(vform, i);
2899 }
2900
2901 dst.ClearForWrite(vform);
2902 for (int i = 0; i < laneCount; ++i) {
2903 dst.SetUint(vform, i, result[2 * i]);
2904 }
2905 return dst;
2906 }
2907
2908 LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst,
2909 const LogicVRegister& src1,
2910 const LogicVRegister& src2) {
2911 uint64_t result[32];
2912 int laneCount = LaneCountFromFormat(vform);
2913 for (int i = 0; i < laneCount; ++i) {
2914 result[i] = src1.Uint(vform, i);
2915 result[laneCount + i] = src2.Uint(vform, i);
2916 }
2917
2918 dst.ClearForWrite(vform);
2919 for (int i = 0; i < laneCount; ++i) {
2920 dst.SetUint(vform, i, result[(2 * i) + 1]);
2921 }
2922 return dst;
2923 }
2924
2925 template <typename T>
2926 T Simulator::FPAdd(T op1, T op2) {
2927 T result = FPProcessNaNs(op1, op2);
2928 if (std::isnan(result)) return result;
2929
2930 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
2931 // inf + -inf returns the default NaN.
2932 FPProcessException();
2933 return FPDefaultNaN<T>();
2934 } else {
2935 // Other cases should be handled by standard arithmetic.
2936 return op1 + op2;
2937 }
2938 }
2939
2940 template <typename T>
2941 T Simulator::FPSub(T op1, T op2) {
2942 // NaNs should be handled elsewhere.
2943 DCHECK(!std::isnan(op1) && !std::isnan(op2));
2944
2945 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
2946 // inf - inf returns the default NaN.
2947 FPProcessException();
2948 return FPDefaultNaN<T>();
2949 } else {
2950 // Other cases should be handled by standard arithmetic.
2951 return op1 - op2;
2952 }
2953 }
2954
2955 template <typename T>
2956 T Simulator::FPMul(T op1, T op2) {
2957 // NaNs should be handled elsewhere.
2958 DCHECK(!std::isnan(op1) && !std::isnan(op2));
2959
2960 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2961 // inf * 0.0 returns the default NaN.
2962 FPProcessException();
2963 return FPDefaultNaN<T>();
2964 } else {
2965 // Other cases should be handled by standard arithmetic.
2966 return op1 * op2;
2967 }
2968 }
2969
2970 template <typename T>
2971 T Simulator::FPMulx(T op1, T op2) {
2972 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2973 // inf * 0.0 returns +/-2.0.
2974 T two = 2.0;
2975 return copysign(1.0, op1) * copysign(1.0, op2) * two;
2976 }
2977 return FPMul(op1, op2);
2978 }
2979
2980 template <typename T>
2981 T Simulator::FPMulAdd(T a, T op1, T op2) {
2982 T result = FPProcessNaNs3(a, op1, op2);
2983
2984 T sign_a = copysign(1.0, a);
2985 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
2986 bool isinf_prod = std::isinf(op1) || std::isinf(op2);
2987 bool operation_generates_nan =
2988 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0
2989 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf
2990 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
2991
2992 if (std::isnan(result)) {
2993 // Generated NaNs override quiet NaNs propagated from a.
2994 if (operation_generates_nan && IsQuietNaN(a)) {
2995 FPProcessException();
2996 return FPDefaultNaN<T>();
2997 } else {
2998 return result;
2999 }
3000 }
3001
3002 // If the operation would produce a NaN, return the default NaN.
3003 if (operation_generates_nan) {
3004 FPProcessException();
3005 return FPDefaultNaN<T>();
3006 }
3007
3008 // Work around broken fma implementations for exact zero results: The sign of
3009 // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3010 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3011 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3012 }
3013
3014 result = FusedMultiplyAdd(op1, op2, a);
3015 DCHECK(!std::isnan(result));
3016
3017 // Work around broken fma implementations for rounded zero results: If a is
3018 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3019 if ((a == 0.0) && (result == 0.0)) {
3020 return copysign(0.0, sign_prod);
3021 }
3022
3023 return result;
3024 }
3025
3026 template <typename T>
3027 T Simulator::FPDiv(T op1, T op2) {
3028 // NaNs should be handled elsewhere.
3029 DCHECK(!std::isnan(op1) && !std::isnan(op2));
3030
3031 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3032 // inf / inf and 0.0 / 0.0 return the default NaN.
3033 FPProcessException();
3034 return FPDefaultNaN<T>();
3035 } else {
3036 if (op2 == 0.0) {
3037 FPProcessException();
3038 if (!std::isnan(op1)) {
3039 double op1_sign = copysign(1.0, op1);
3040 double op2_sign = copysign(1.0, op2);
3041 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
3042 }
3043 }
3044
3045 // Other cases should be handled by standard arithmetic.
3046 return op1 / op2;
3047 }
3048 }
3049
3050 template <typename T>
3051 T Simulator::FPSqrt(T op) {
3052 if (std::isnan(op)) {
3053 return FPProcessNaN(op);
3054 } else if (op < 0.0) {
3055 FPProcessException();
3056 return FPDefaultNaN<T>();
3057 } else {
3058 return sqrt(op);
3059 }
3060 }
3061
3062 template <typename T>
3063 T Simulator::FPMax(T a, T b) {
3064 T result = FPProcessNaNs(a, b);
3065 if (std::isnan(result)) return result;
3066
3067 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3068 // a and b are zero, and the sign differs: return +0.0.
3069 return 0.0;
3070 } else {
3071 return (a > b) ? a : b;
3072 }
3073 }
3074
3075 template <typename T>
3076 T Simulator::FPMaxNM(T a, T b) {
3077 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3078 a = kFP64NegativeInfinity;
3079 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3080 b = kFP64NegativeInfinity;
3081 }
3082
3083 T result = FPProcessNaNs(a, b);
3084 return std::isnan(result) ? result : FPMax(a, b);
3085 }
3086
3087 template <typename T>
3088 T Simulator::FPMin(T a, T b) {
3089 T result = FPProcessNaNs(a, b);
3090 if (std::isnan(result)) return result;
3091
3092 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3093 // a and b are zero, and the sign differs: return -0.0.
3094 return -0.0;
3095 } else {
3096 return (a < b) ? a : b;
3097 }
3098 }
3099
3100 template <typename T>
3101 T Simulator::FPMinNM(T a, T b) {
3102 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3103 a = kFP64PositiveInfinity;
3104 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3105 b = kFP64PositiveInfinity;
3106 }
3107
3108 T result = FPProcessNaNs(a, b);
3109 return std::isnan(result) ? result : FPMin(a, b);
3110 }
3111
3112 template <typename T>
3113 T Simulator::FPRecipStepFused(T op1, T op2) {
3114 const T two = 2.0;
3115 if ((std::isinf(op1) && (op2 == 0.0)) ||
3116 ((op1 == 0.0) && (std::isinf(op2)))) {
3117 return two;
3118 } else if (std::isinf(op1) || std::isinf(op2)) {
3119 // Return +inf if signs match, otherwise -inf.
3120 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3121 : kFP64NegativeInfinity;
3122 } else {
3123 return FusedMultiplyAdd(op1, op2, two);
3124 }
3125 }
3126
3127 template <typename T>
3128 T Simulator::FPRSqrtStepFused(T op1, T op2) {
3129 const T one_point_five = 1.5;
3130 const T two = 2.0;
3131
3132 if ((std::isinf(op1) && (op2 == 0.0)) ||
3133 ((op1 == 0.0) && (std::isinf(op2)))) {
3134 return one_point_five;
3135 } else if (std::isinf(op1) || std::isinf(op2)) {
3136 // Return +inf if signs match, otherwise -inf.
3137 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3138 : kFP64NegativeInfinity;
3139 } else {
3140 // The multiply-add-halve operation must be fully fused, so avoid interim
3141 // rounding by checking which operand can be losslessly divided by two
3142 // before doing the multiply-add.
3143 if (std::isnormal(op1 / two)) {
3144 return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3145 } else if (std::isnormal(op2 / two)) {
3146 return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3147 } else {
3148 // Neither operand is normal after halving: the result is dominated by
3149 // the addition term, so just return that.
3150 return one_point_five;
3151 }
3152 }
3153 }
3154
3155 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3156 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3157 (value == kFP64NegativeInfinity)) {
3158 return value;
3159 } else if (std::isnan(value)) {
3160 return FPProcessNaN(value);
3161 }
3162
3163 double int_result = std::floor(value);
3164 double error = value - int_result;
3165 switch (round_mode) {
3166 case FPTieAway: {
3167 // Take care of correctly handling the range ]-0.5, -0.0], which must
3168 // yield -0.0.
3169 if ((-0.5 < value) && (value < 0.0)) {
3170 int_result = -0.0;
3171
3172 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3173 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3174 // result is positive, round up.
3175 int_result++;
3176 }
3177 break;
3178 }
3179 case FPTieEven: {
3180 // Take care of correctly handling the range [-0.5, -0.0], which must
3181 // yield -0.0.
3182 if ((-0.5 <= value) && (value < 0.0)) {
3183 int_result = -0.0;
3184
3185 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3186 // result is odd, round up.
3187 } else if ((error > 0.5) ||
3188 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3189 int_result++;
3190 }
3191 break;
3192 }
3193 case FPZero: {
3194 // If value>0 then we take floor(value)
3195 // otherwise, ceil(value).
3196 if (value < 0) {
3197 int_result = ceil(value);
3198 }
3199 break;
3200 }
3201 case FPNegativeInfinity: {
3202 // We always use floor(value).
3203 break;
3204 }
3205 case FPPositiveInfinity: {
3206 // Take care of correctly handling the range ]-1.0, -0.0], which must
3207 // yield -0.0.
3208 if ((-1.0 < value) && (value < 0.0)) {
3209 int_result = -0.0;
3210
3211 // If the error is non-zero, round up.
3212 } else if (error > 0.0) {
3213 int_result++;
3214 }
3215 break;
3216 }
3217 default:
3218 UNIMPLEMENTED();
3219 }
3220 return int_result;
3221 }
3222
3223 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3224 value = FPRoundInt(value, rmode);
3225 if (value >= kWMaxInt) {
3226 return kWMaxInt;
3227 } else if (value < kWMinInt) {
3228 return kWMinInt;
3229 }
3230 return std::isnan(value) ? 0 : static_cast<int32_t>(value);
3231 }
3232
3233 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3234 value = FPRoundInt(value, rmode);
3235 if (value >= kXMaxInt) {
3236 return kXMaxInt;
3237 } else if (value < kXMinInt) {
3238 return kXMinInt;
3239 }
3240 return std::isnan(value) ? 0 : static_cast<int64_t>(value);
3241 }
3242
3243 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3244 value = FPRoundInt(value, rmode);
3245 if (value >= kWMaxUInt) {
3246 return kWMaxUInt;
3247 } else if (value < 0.0) {
3248 return 0;
3249 }
3250 return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
3251 }
3252
3253 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3254 value = FPRoundInt(value, rmode);
3255 if (value >= kXMaxUInt) {
3256 return kXMaxUInt;
3257 } else if (value < 0.0) {
3258 return 0;
3259 }
3260 return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
3261 }
3262
3263 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
3264 template <typename T> \
3265 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3266 const LogicVRegister& src1, \
3267 const LogicVRegister& src2) { \
3268 dst.ClearForWrite(vform); \
3269 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
3270 T op1 = src1.Float<T>(i); \
3271 T op2 = src2.Float<T>(i); \
3272 T result; \
3273 if (PROCNAN) { \
3274 result = FPProcessNaNs(op1, op2); \
3275 if (!std::isnan(result)) { \
3276 result = OP(op1, op2); \
3277 } \
3278 } else { \
3279 result = OP(op1, op2); \
3280 } \
3281 dst.SetFloat(i, result); \
3282 } \
3283 return dst; \
3284 } \
3285 \
3286 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3287 const LogicVRegister& src1, \
3288 const LogicVRegister& src2) { \
3289 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { \
3290 FN<float>(vform, dst, src1, src2); \
3291 } else { \
3292 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); \
3293 FN<double>(vform, dst, src1, src2); \
3294 } \
3295 return dst; \
3296 }
3297 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3298 #undef DEFINE_NEON_FP_VECTOR_OP
3299
3300 LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst,
3301 const LogicVRegister& src1,
3302 const LogicVRegister& src2) {
3303 SimVRegister temp;
3304 LogicVRegister product = fmul(vform, temp, src1, src2);
3305 return fneg(vform, dst, product);
3306 }
3307
3308 template <typename T>
3309 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3310 const LogicVRegister& src1,
3311 const LogicVRegister& src2) {
3312 dst.ClearForWrite(vform);
3313 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3314 T op1 = -src1.Float<T>(i);
3315 T op2 = src2.Float<T>(i);
3316 T result = FPProcessNaNs(op1, op2);
3317 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
3318 }
3319 return dst;
3320 }
3321
3322 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3323 const LogicVRegister& src1,
3324 const LogicVRegister& src2) {
3325 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3326 frecps<float>(vform, dst, src1, src2);
3327 } else {
3328 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3329 frecps<double>(vform, dst, src1, src2);
3330 }
3331 return dst;
3332 }
3333
3334 template <typename T>
3335 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3336 const LogicVRegister& src1,
3337 const LogicVRegister& src2) {
3338 dst.ClearForWrite(vform);
3339 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3340 T op1 = -src1.Float<T>(i);
3341 T op2 = src2.Float<T>(i);
3342 T result = FPProcessNaNs(op1, op2);
3343 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
3344 }
3345 return dst;
3346 }
3347
3348 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3349 const LogicVRegister& src1,
3350 const LogicVRegister& src2) {
3351 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3352 frsqrts<float>(vform, dst, src1, src2);
3353 } else {
3354 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3355 frsqrts<double>(vform, dst, src1, src2);
3356 }
3357 return dst;
3358 }
3359
3360 template <typename T>
3361 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3362 const LogicVRegister& src1,
3363 const LogicVRegister& src2, Condition cond) {
3364 dst.ClearForWrite(vform);
3365 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3366 bool result = false;
3367 T op1 = src1.Float<T>(i);
3368 T op2 = src2.Float<T>(i);
3369 T nan_result = FPProcessNaNs(op1, op2);
3370 if (!std::isnan(nan_result)) {
3371 switch (cond) {
3372 case eq:
3373 result = (op1 == op2);
3374 break;
3375 case ge:
3376 result = (op1 >= op2);
3377 break;
3378 case gt:
3379 result = (op1 > op2);
3380 break;
3381 case le:
3382 result = (op1 <= op2);
3383 break;
3384 case lt:
3385 result = (op1 < op2);
3386 break;
3387 default:
3388 UNREACHABLE();
3389 break;
3390 }
3391 }
3392 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
3393 }
3394 return dst;
3395 }
3396
3397 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3398 const LogicVRegister& src1,
3399 const LogicVRegister& src2, Condition cond) {
3400 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3401 fcmp<float>(vform, dst, src1, src2, cond);
3402 } else {
3403 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3404 fcmp<double>(vform, dst, src1, src2, cond);
3405 }
3406 return dst;
3407 }
3408
3409 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst,
3410 const LogicVRegister& src, Condition cond) {
3411 SimVRegister temp;
3412 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3413 LogicVRegister zero_reg =
3414 dup_immediate(vform, temp, bit_cast<uint32_t>(0.0f));
3415 fcmp<float>(vform, dst, src, zero_reg, cond);
3416 } else {
3417 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3418 LogicVRegister zero_reg =
3419 dup_immediate(vform, temp, bit_cast<uint64_t>(0.0));
3420 fcmp<double>(vform, dst, src, zero_reg, cond);
3421 }
3422 return dst;
3423 }
3424
3425 LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst,
3426 const LogicVRegister& src1,
3427 const LogicVRegister& src2, Condition cond) {
3428 SimVRegister temp1, temp2;
3429 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3430 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
3431 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
3432 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
3433 } else {
3434 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3435 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
3436 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
3437 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
3438 }
3439 return dst;
3440 }
3441
3442 template <typename T>
3443 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3444 const LogicVRegister& src1,
3445 const LogicVRegister& src2) {
3446 dst.ClearForWrite(vform);
3447 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3448 T op1 = src1.Float<T>(i);
3449 T op2 = src2.Float<T>(i);
3450 T acc = dst.Float<T>(i);
3451 T result = FPMulAdd(acc, op1, op2);
3452 dst.SetFloat(i, result);
3453 }
3454 return dst;
3455 }
3456
3457 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3458 const LogicVRegister& src1,
3459 const LogicVRegister& src2) {
3460 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3461 fmla<float>(vform, dst, src1, src2);
3462 } else {
3463 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3464 fmla<double>(vform, dst, src1, src2);
3465 }
3466 return dst;
3467 }
3468
3469 template <typename T>
3470 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3471 const LogicVRegister& src1,
3472 const LogicVRegister& src2) {
3473 dst.ClearForWrite(vform);
3474 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3475 T op1 = -src1.Float<T>(i);
3476 T op2 = src2.Float<T>(i);
3477 T acc = dst.Float<T>(i);
3478 T result = FPMulAdd(acc, op1, op2);
3479 dst.SetFloat(i, result);
3480 }
3481 return dst;
3482 }
3483
3484 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3485 const LogicVRegister& src1,
3486 const LogicVRegister& src2) {
3487 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3488 fmls<float>(vform, dst, src1, src2);
3489 } else {
3490 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3491 fmls<double>(vform, dst, src1, src2);
3492 }
3493 return dst;
3494 }
3495
3496 template <typename T>
3497 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3498 const LogicVRegister& src) {
3499 dst.ClearForWrite(vform);
3500 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3501 T op = src.Float<T>(i);
3502 op = -op;
3503 dst.SetFloat(i, op);
3504 }
3505 return dst;
3506 }
3507
3508 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3509 const LogicVRegister& src) {
3510 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3511 fneg<float>(vform, dst, src);
3512 } else {
3513 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3514 fneg<double>(vform, dst, src);
3515 }
3516 return dst;
3517 }
3518
3519 template <typename T>
3520 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3521 const LogicVRegister& src) {
3522 dst.ClearForWrite(vform);
3523 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3524 T op = src.Float<T>(i);
3525 if (copysign(1.0, op) < 0.0) {
3526 op = -op;
3527 }
3528 dst.SetFloat(i, op);
3529 }
3530 return dst;
3531 }
3532
3533 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3534 const LogicVRegister& src) {
3535 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3536 fabs_<float>(vform, dst, src);
3537 } else {
3538 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3539 fabs_<double>(vform, dst, src);
3540 }
3541 return dst;
3542 }
3543
3544 LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst,
3545 const LogicVRegister& src1,
3546 const LogicVRegister& src2) {
3547 SimVRegister temp;
3548 fsub(vform, temp, src1, src2);
3549 fabs_(vform, dst, temp);
3550 return dst;
3551 }
3552
3553 LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst,
3554 const LogicVRegister& src) {
3555 dst.ClearForWrite(vform);
3556 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3557 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3558 float result = FPSqrt(src.Float<float>(i));
3559 dst.SetFloat(i, result);
3560 }
3561 } else {
3562 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3563 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3564 double result = FPSqrt(src.Float<double>(i));
3565 dst.SetFloat(i, result);
3566 }
3567 }
3568 return dst;
3569 }
3570
3571 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
3572 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3573 const LogicVRegister& src1, \
3574 const LogicVRegister& src2) { \
3575 SimVRegister temp1, temp2; \
3576 uzp1(vform, temp1, src1, src2); \
3577 uzp2(vform, temp2, src1, src2); \
3578 FN(vform, dst, temp1, temp2); \
3579 return dst; \
3580 } \
3581 \
3582 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3583 const LogicVRegister& src) { \
3584 if (vform == kFormatS) { \
3585 float result = OP(src.Float<float>(0), src.Float<float>(1)); \
3586 dst.SetFloat(0, result); \
3587 } else { \
3588 DCHECK_EQ(vform, kFormatD); \
3589 double result = OP(src.Float<double>(0), src.Float<double>(1)); \
3590 dst.SetFloat(0, result); \
3591 } \
3592 dst.ClearForWrite(vform); \
3593 return dst; \
3594 }
3595 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
3596 #undef DEFINE_NEON_FP_PAIR_OP
3597
3598 LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst,
3599 const LogicVRegister& src, FPMinMaxOp Op) {
3600 DCHECK_EQ(vform, kFormat4S);
3601 USE(vform);
3602 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
3603 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
3604 float result = (this->*Op)(result1, result2);
3605 dst.ClearForWrite(kFormatS);
3606 dst.SetFloat<float>(0, result);
3607 return dst;
3608 }
3609
3610 LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst,
3611 const LogicVRegister& src) {
3612 return FMinMaxV(vform, dst, src, &Simulator::FPMax);
3613 }
3614
3615 LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst,
3616 const LogicVRegister& src) {
3617 return FMinMaxV(vform, dst, src, &Simulator::FPMin);
3618 }
3619
3620 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst,
3621 const LogicVRegister& src) {
3622 return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM);
3623 }
3624
3625 LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst,
3626 const LogicVRegister& src) {
3627 return FMinMaxV(vform, dst, src, &Simulator::FPMinNM);
3628 }
3629
3630 LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst,
3631 const LogicVRegister& src1,
3632 const LogicVRegister& src2, int index) {
3633 dst.ClearForWrite(vform);
3634 SimVRegister temp;
3635 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3636 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3637 fmul<float>(vform, dst, src1, index_reg);
3638 } else {
3639 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3640 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3641 fmul<double>(vform, dst, src1, index_reg);
3642 }
3643 return dst;
3644 }
3645
3646 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3647 const LogicVRegister& src1,
3648 const LogicVRegister& src2, int index) {
3649 dst.ClearForWrite(vform);
3650 SimVRegister temp;
3651 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3652 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3653 fmla<float>(vform, dst, src1, index_reg);
3654 } else {
3655 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3656 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3657 fmla<double>(vform, dst, src1, index_reg);
3658 }
3659 return dst;
3660 }
3661
3662 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3663 const LogicVRegister& src1,
3664 const LogicVRegister& src2, int index) {
3665 dst.ClearForWrite(vform);
3666 SimVRegister temp;
3667 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3668 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3669 fmls<float>(vform, dst, src1, index_reg);
3670 } else {
3671 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3672 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3673 fmls<double>(vform, dst, src1, index_reg);
3674 }
3675 return dst;
3676 }
3677
3678 LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst,
3679 const LogicVRegister& src1,
3680 const LogicVRegister& src2, int index) {
3681 dst.ClearForWrite(vform);
3682 SimVRegister temp;
3683 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3684 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3685 fmulx<float>(vform, dst, src1, index_reg);
3686
3687 } else {
3688 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3689 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3690 fmulx<double>(vform, dst, src1, index_reg);
3691 }
3692 return dst;
3693 }
3694
3695 LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst,
3696 const LogicVRegister& src,
3697 FPRounding rounding_mode,
3698 bool inexact_exception) {
3699 dst.ClearForWrite(vform);
3700 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3701 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3702 float input = src.Float<float>(i);
3703 float rounded = FPRoundInt(input, rounding_mode);
3704 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3705 FPProcessException();
3706 }
3707 dst.SetFloat<float>(i, rounded);
3708 }
3709 } else {
3710 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3711 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3712 double input = src.Float<double>(i);
3713 double rounded = FPRoundInt(input, rounding_mode);
3714 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3715 FPProcessException();
3716 }
3717 dst.SetFloat<double>(i, rounded);
3718 }
3719 }
3720 return dst;
3721 }
3722
3723 LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst,
3724 const LogicVRegister& src,
3725 FPRounding rounding_mode, int fbits) {
3726 dst.ClearForWrite(vform);
3727 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3728 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3729 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3730 dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
3731 }
3732 } else {
3733 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3734 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3735 double op = src.Float<double>(i) * std::pow(2.0, fbits);
3736 dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
3737 }
3738 }
3739 return dst;
3740 }
3741
3742 LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst,
3743 const LogicVRegister& src,
3744 FPRounding rounding_mode, int fbits) {
3745 dst.ClearForWrite(vform);
3746 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3747 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3748 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3749 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
3750 }
3751 } else {
3752 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3753 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3754 double op = src.Float<double>(i) * std::pow(2.0, fbits);
3755 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
3756 }
3757 }
3758 return dst;
3759 }
3760
3761 LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst,
3762 const LogicVRegister& src) {
3763 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3764 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3765 dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
3766 }
3767 } else {
3768 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3769 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3770 dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
3771 }
3772 }
3773 return dst;
3774 }
3775
3776 LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst,
3777 const LogicVRegister& src) {
3778 int lane_count = LaneCountFromFormat(vform);
3779 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3780 for (int i = 0; i < lane_count; i++) {
3781 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
3782 }
3783 } else {
3784 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3785 for (int i = 0; i < lane_count; i++) {
3786 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
3787 }
3788 }
3789 return dst;
3790 }
3791
3792 LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst,
3793 const LogicVRegister& src) {
3794 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3795 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3796 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
3797 }
3798 } else {
3799 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3800 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3801 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
3802 }
3803 }
3804 return dst;
3805 }
3806
3807 LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst,
3808 const LogicVRegister& src) {
3809 int lane_count = LaneCountFromFormat(vform) / 2;
3810 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3811 for (int i = lane_count - 1; i >= 0; i--) {
3812 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
3813 }
3814 } else {
3815 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3816 for (int i = lane_count - 1; i >= 0; i--) {
3817 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
3818 }
3819 }
3820 return dst;
3821 }
3822
3823 LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst,
3824 const LogicVRegister& src) {
3825 dst.ClearForWrite(vform);
3826 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3827 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3828 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
3829 }
3830 return dst;
3831 }
3832
3833 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst,
3834 const LogicVRegister& src) {
3835 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3836 int lane_count = LaneCountFromFormat(vform) / 2;
3837 for (int i = lane_count - 1; i >= 0; i--) {
3838 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
3839 }
3840 return dst;
3841 }
3842
3843 // Based on reference C function recip_sqrt_estimate from ARM ARM.
3844 double Simulator::recip_sqrt_estimate(double a) {
3845 int q0, q1, s;
3846 double r;
3847 if (a < 0.5) {
3848 q0 = static_cast<int>(a * 512.0);
3849 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
3850 } else {
3851 q1 = static_cast<int>(a * 256.0);
3852 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
3853 }
3854 s = static_cast<int>(256.0 * r + 0.5);
3855 return static_cast<double>(s) / 256.0;
3856 }
3857
3858 namespace {
3859
3860 inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
3861 return unsigned_bitextract_64(start_bit, end_bit, val);
3862 }
3863
3864 } // anonymous namespace
3865
3866 template <typename T>
3867 T Simulator::FPRecipSqrtEstimate(T op) {
3868 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
3869 "T must be a float or double");
3870
3871 if (std::isnan(op)) {
3872 return FPProcessNaN(op);
3873 } else if (op == 0.0) {
3874 if (copysign(1.0, op) < 0.0) {
3875 return kFP64NegativeInfinity;
3876 } else {
3877 return kFP64PositiveInfinity;
3878 }
3879 } else if (copysign(1.0, op) < 0.0) {
3880 FPProcessException();
3881 return FPDefaultNaN<T>();
3882 } else if (std::isinf(op)) {
3883 return 0.0;
3884 } else {
3885 uint64_t fraction;
3886 int32_t exp, result_exp;
3887
3888 if (sizeof(T) == sizeof(float)) {
3889 exp = static_cast<int32_t>(float_exp(op));
3890 fraction = float_mantissa(op);
3891 fraction <<= 29;
3892 } else {
3893 exp = static_cast<int32_t>(double_exp(op));
3894 fraction = double_mantissa(op);
3895 }
3896
3897 if (exp == 0) {
3898 while (Bits(fraction, 51, 51) == 0) {
3899 fraction = Bits(fraction, 50, 0) << 1;
3900 exp -= 1;
3901 }
3902 fraction = Bits(fraction, 50, 0) << 1;
3903 }
3904
3905 double scaled;
3906 if (Bits(exp, 0, 0) == 0) {
3907 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
3908 } else {
3909 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);
3910 }
3911
3912 if (sizeof(T) == sizeof(float)) {
3913 result_exp = (380 - exp) / 2;
3914 } else {
3915 result_exp = (3068 - exp) / 2;
3916 }
3917
3918 uint64_t estimate = bit_cast<uint64_t>(recip_sqrt_estimate(scaled));
3919
3920 if (sizeof(T) == sizeof(float)) {
3921 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
3922 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
3923 return float_pack(0, exp_bits, est_bits);
3924 } else {
3925 return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
3926 }
3927 }
3928 }
3929
3930 LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst,
3931 const LogicVRegister& src) {
3932 dst.ClearForWrite(vform);
3933 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3934 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3935 float input = src.Float<float>(i);
3936 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
3937 }
3938 } else {
3939 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3940 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3941 double input = src.Float<double>(i);
3942 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
3943 }
3944 }
3945 return dst;
3946 }
3947
3948 template <typename T>
3949 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
3950 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
3951 "T must be a float or double");
3952 uint32_t sign;
3953
3954 if (sizeof(T) == sizeof(float)) {
3955 sign = float_sign(op);
3956 } else {
3957 sign = double_sign(op);
3958 }
3959
3960 if (std::isnan(op)) {
3961 return FPProcessNaN(op);
3962 } else if (std::isinf(op)) {
3963 return (sign == 1) ? -0.0 : 0.0;
3964 } else if (op == 0.0) {
3965 FPProcessException(); // FPExc_DivideByZero exception.
3966 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
3967 } else if (((sizeof(T) == sizeof(float)) &&
3968 (std::fabs(op) < std::pow(2.0, -128.0))) ||
3969 ((sizeof(T) == sizeof(double)) &&
3970 (std::fabs(op) < std::pow(2.0, -1024.0)))) {
3971 bool overflow_to_inf = false;
3972 switch (rounding) {
3973 case FPTieEven:
3974 overflow_to_inf = true;
3975 break;
3976 case FPPositiveInfinity:
3977 overflow_to_inf = (sign == 0);
3978 break;
3979 case FPNegativeInfinity:
3980 overflow_to_inf = (sign == 1);
3981 break;
3982 case FPZero:
3983 overflow_to_inf = false;
3984 break;
3985 default:
3986 break;
3987 }
3988 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
3989 if (overflow_to_inf) {
3990 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
3991 } else {
3992 // Return FPMaxNormal(sign).
3993 if (sizeof(T) == sizeof(float)) {
3994 return float_pack(sign, 0xfe, 0x07fffff);
3995 } else {
3996 return double_pack(sign, 0x7fe, 0x0fffffffffffffl);
3997 }
3998 }
3999 } else {
4000 uint64_t fraction;
4001 int32_t exp, result_exp;
4002 uint32_t sign;
4003
4004 if (sizeof(T) == sizeof(float)) {
4005 sign = float_sign(op);
4006 exp = static_cast<int32_t>(float_exp(op));
4007 fraction = float_mantissa(op);
4008 fraction <<= 29;
4009 } else {
4010 sign = double_sign(op);
4011 exp = static_cast<int32_t>(double_exp(op));
4012 fraction = double_mantissa(op);
4013 }
4014
4015 if (exp == 0) {
4016 if (Bits(fraction, 51, 51) == 0) {
4017 exp -= 1;
4018 fraction = Bits(fraction, 49, 0) << 2;
4019 } else {
4020 fraction = Bits(fraction, 50, 0) << 1;
4021 }
4022 }
4023
4024 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4025
4026 if (sizeof(T) == sizeof(float)) {
4027 result_exp = 253 - exp;
4028 } else {
4029 result_exp = 2045 - exp;
4030 }
4031
4032 double estimate = recip_estimate(scaled);
4033
4034 fraction = double_mantissa(estimate);
4035 if (result_exp == 0) {
4036 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4037 } else if (result_exp == -1) {
4038 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4039 result_exp = 0;
4040 }
4041 if (sizeof(T) == sizeof(float)) {
4042 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4043 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
4044 return float_pack(sign, exp_bits, frac_bits);
4045 } else {
4046 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4047 }
4048 }
4049 }
4050
4051 LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst,
4052 const LogicVRegister& src, FPRounding round) {
4053 dst.ClearForWrite(vform);
4054 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4055 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4056 float input = src.Float<float>(i);
4057 dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4058 }
4059 } else {
4060 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4061 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4062 double input = src.Float<double>(i);
4063 dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4064 }
4065 }
4066 return dst;
4067 }
4068
4069 LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst,
4070 const LogicVRegister& src) {
4071 dst.ClearForWrite(vform);
4072 uint64_t operand;
4073 uint32_t result;
4074 double dp_operand, dp_result;
4075 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4076 operand = src.Uint(vform, i);
4077 if (operand <= 0x3FFFFFFF) {
4078 result = 0xFFFFFFFF;
4079 } else {
4080 dp_operand = operand * std::pow(2.0, -32);
4081 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4082 result = static_cast<uint32_t>(dp_result);
4083 }
4084 dst.SetUint(vform, i, result);
4085 }
4086 return dst;
4087 }
4088
4089 // Based on reference C function recip_estimate from ARM ARM.
4090 double Simulator::recip_estimate(double a) {
4091 int q, s;
4092 double r;
4093 q = static_cast<int>(a * 512.0);
4094 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4095 s = static_cast<int>(256.0 * r + 0.5);
4096 return static_cast<double>(s) / 256.0;
4097 }
4098
4099 LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst,
4100 const LogicVRegister& src) {
4101 dst.ClearForWrite(vform);
4102 uint64_t operand;
4103 uint32_t result;
4104 double dp_operand, dp_result;
4105 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4106 operand = src.Uint(vform, i);
4107 if (operand <= 0x7FFFFFFF) {
4108 result = 0xFFFFFFFF;
4109 } else {
4110 dp_operand = operand * std::pow(2.0, -32);
4111 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4112 result = static_cast<uint32_t>(dp_result);
4113 }
4114 dst.SetUint(vform, i, result);
4115 }
4116 return dst;
4117 }
4118
4119 template <typename T>
4120 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4121 const LogicVRegister& src) {
4122 dst.ClearForWrite(vform);
4123 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4124 T op = src.Float<T>(i);
4125 T result;
4126 if (std::isnan(op)) {
4127 result = FPProcessNaN(op);
4128 } else {
4129 int exp;
4130 uint32_t sign;
4131 if (sizeof(T) == sizeof(float)) {
4132 sign = float_sign(op);
4133 exp = static_cast<int>(float_exp(op));
4134 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
4135 result = float_pack(sign, exp, 0);
4136 } else {
4137 sign = double_sign(op);
4138 exp = static_cast<int>(double_exp(op));
4139 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
4140 result = double_pack(sign, exp, 0);
4141 }
4142 }
4143 dst.SetFloat(i, result);
4144 }
4145 return dst;
4146 }
4147
4148 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4149 const LogicVRegister& src) {
4150 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4151 frecpx<float>(vform, dst, src);
4152 } else {
4153 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4154 frecpx<double>(vform, dst, src);
4155 }
4156 return dst;
4157 }
4158
4159 LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst,
4160 const LogicVRegister& src, int fbits,
4161 FPRounding round) {
4162 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4163 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4164 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4165 dst.SetFloat<float>(i, result);
4166 } else {
4167 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4168 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4169 dst.SetFloat<double>(i, result);
4170 }
4171 }
4172 return dst;
4173 }
4174
4175 LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst,
4176 const LogicVRegister& src, int fbits,
4177 FPRounding round) {
4178 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4179 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4180 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4181 dst.SetFloat<float>(i, result);
4182 } else {
4183 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4184 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4185 dst.SetFloat<double>(i, result);
4186 }
4187 }
4188 return dst;
4189 }
4190
4191 #endif // USE_SIMULATOR
4192
4193 } // namespace internal
4194 } // namespace v8
4195
4196 #endif // V8_TARGET_ARCH_ARM64
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698