Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(70)

Side by Side Diff: src/arm64/simulator-logic-arm64.cc

Issue 2896303003: Reland of Reland of "ARM64: Add NEON support" (Closed)
Patch Set: Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm64/simulator-arm64.cc ('k') | src/arm64/utils-arm64.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #if V8_TARGET_ARCH_ARM64
6
7 #include <cmath>
8 #include "src/arm64/simulator-arm64.h"
9
10 namespace v8 {
11 namespace internal {
12
13 #if defined(USE_SIMULATOR)
14
15 namespace {
16
17 // See FPRound for a description of this function.
18 inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa,
19 FPRounding round_mode) {
20 uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(
21 sign, exponent, mantissa, round_mode);
22 return bit_cast<double>(bits);
23 }
24
25 // See FPRound for a description of this function.
26 inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa,
27 FPRounding round_mode) {
28 uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(
29 sign, exponent, mantissa, round_mode);
30 return bit_cast<float>(bits);
31 }
32
33 // See FPRound for a description of this function.
34 inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent,
35 uint64_t mantissa, FPRounding round_mode) {
36 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
37 sign, exponent, mantissa, round_mode);
38 }
39
40 } // namespace
41
42 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
43 if (src >= 0) {
44 return UFixedToDouble(src, fbits, round);
45 } else if (src == INT64_MIN) {
46 return -UFixedToDouble(src, fbits, round);
47 } else {
48 return -UFixedToDouble(-src, fbits, round);
49 }
50 }
51
52 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
53 // An input of 0 is a special case because the result is effectively
54 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
55 if (src == 0) {
56 return 0.0;
57 }
58
59 // Calculate the exponent. The highest significant bit will have the value
60 // 2^exponent.
61 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
62 const int64_t exponent = highest_significant_bit - fbits;
63
64 return FPRoundToDouble(0, exponent, src, round);
65 }
66
67 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
68 if (src >= 0) {
69 return UFixedToFloat(src, fbits, round);
70 } else if (src == INT64_MIN) {
71 return -UFixedToFloat(src, fbits, round);
72 } else {
73 return -UFixedToFloat(-src, fbits, round);
74 }
75 }
76
77 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
78 // An input of 0 is a special case because the result is effectively
79 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
80 if (src == 0) {
81 return 0.0f;
82 }
83
84 // Calculate the exponent. The highest significant bit will have the value
85 // 2^exponent.
86 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
87 const int32_t exponent = highest_significant_bit - fbits;
88
89 return FPRoundToFloat(0, exponent, src, round);
90 }
91
92 double Simulator::FPToDouble(float value) {
93 switch (std::fpclassify(value)) {
94 case FP_NAN: {
95 if (IsSignallingNaN(value)) {
96 FPProcessException();
97 }
98 if (DN()) return kFP64DefaultNaN;
99
100 // Convert NaNs as the processor would:
101 // - The sign is propagated.
102 // - The mantissa is transferred entirely, except that the top bit is
103 // forced to '1', making the result a quiet NaN. The unused (low-order)
104 // mantissa bits are set to 0.
105 uint32_t raw = bit_cast<uint32_t>(value);
106
107 uint64_t sign = raw >> 31;
108 uint64_t exponent = (1 << kDoubleExponentBits) - 1;
109 uint64_t mantissa = unsigned_bitextract_64(21, 0, raw);
110
111 // Unused low-order bits remain zero.
112 mantissa <<= (kDoubleMantissaBits - kFloatMantissaBits);
113
114 // Force a quiet NaN.
115 mantissa |= (UINT64_C(1) << (kDoubleMantissaBits - 1));
116
117 return double_pack(sign, exponent, mantissa);
118 }
119
120 case FP_ZERO:
121 case FP_NORMAL:
122 case FP_SUBNORMAL:
123 case FP_INFINITE: {
124 // All other inputs are preserved in a standard cast, because every value
125 // representable using an IEEE-754 float is also representable using an
126 // IEEE-754 double.
127 return static_cast<double>(value);
128 }
129 }
130
131 UNREACHABLE();
132 }
133
134 float Simulator::FPToFloat(float16 value) {
135 uint32_t sign = value >> 15;
136 uint32_t exponent =
137 unsigned_bitextract_32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
138 kFloat16MantissaBits, value);
139 uint32_t mantissa =
140 unsigned_bitextract_32(kFloat16MantissaBits - 1, 0, value);
141
142 switch (float16classify(value)) {
143 case FP_ZERO:
144 return (sign == 0) ? 0.0f : -0.0f;
145
146 case FP_INFINITE:
147 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
148
149 case FP_SUBNORMAL: {
150 // Calculate shift required to put mantissa into the most-significant bits
151 // of the destination mantissa.
152 int shift = CountLeadingZeros(mantissa << (32 - 10), 32);
153
154 // Shift mantissa and discard implicit '1'.
155 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
156 mantissa &= (1 << kFloatMantissaBits) - 1;
157
158 // Adjust the exponent for the shift applied, and rebias.
159 exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias);
160 break;
161 }
162
163 case FP_NAN: {
164 if (IsSignallingNaN(value)) {
165 FPProcessException();
166 }
167 if (DN()) return kFP32DefaultNaN;
168
169 // Convert NaNs as the processor would:
170 // - The sign is propagated.
171 // - The mantissa is transferred entirely, except that the top bit is
172 // forced to '1', making the result a quiet NaN. The unused (low-order)
173 // mantissa bits are set to 0.
174 exponent = (1 << kFloatExponentBits) - 1;
175
176 // Increase bits in mantissa, making low-order bits 0.
177 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
178 mantissa |= 1 << (kFloatMantissaBits - 1); // Force a quiet NaN.
179 break;
180 }
181
182 case FP_NORMAL: {
183 // Increase bits in mantissa, making low-order bits 0.
184 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
185
186 // Change exponent bias.
187 exponent += (kFloatExponentBias - kFloat16ExponentBias);
188 break;
189 }
190
191 default:
192 UNREACHABLE();
193 }
194 return float_pack(sign, exponent, mantissa);
195 }
196
197 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
198 // Only the FPTieEven rounding mode is implemented.
199 DCHECK_EQ(round_mode, FPTieEven);
200 USE(round_mode);
201
202 int64_t sign = float_sign(value);
203 int64_t exponent =
204 static_cast<int64_t>(float_exp(value)) - kFloatExponentBias;
205 uint32_t mantissa = float_mantissa(value);
206
207 switch (std::fpclassify(value)) {
208 case FP_NAN: {
209 if (IsSignallingNaN(value)) {
210 FPProcessException();
211 }
212 if (DN()) return kFP16DefaultNaN;
213
214 // Convert NaNs as the processor would:
215 // - The sign is propagated.
216 // - The mantissa is transferred as much as possible, except that the top
217 // bit is forced to '1', making the result a quiet NaN.
218 float16 result =
219 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
220 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
221 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;
222 return result;
223 }
224
225 case FP_ZERO:
226 return (sign == 0) ? 0 : 0x8000;
227
228 case FP_INFINITE:
229 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
230
231 case FP_NORMAL:
232 case FP_SUBNORMAL: {
233 // Convert float-to-half as the processor would, assuming that FPCR.FZ
234 // (flush-to-zero) is not set.
235
236 // Add the implicit '1' bit to the mantissa.
237 mantissa += (1 << kFloatMantissaBits);
238 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
239 }
240 }
241
242 UNREACHABLE();
243 }
244
245 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
246 // Only the FPTieEven rounding mode is implemented.
247 DCHECK_EQ(round_mode, FPTieEven);
248 USE(round_mode);
249
250 int64_t sign = double_sign(value);
251 int64_t exponent =
252 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
253 uint64_t mantissa = double_mantissa(value);
254
255 switch (std::fpclassify(value)) {
256 case FP_NAN: {
257 if (IsSignallingNaN(value)) {
258 FPProcessException();
259 }
260 if (DN()) return kFP16DefaultNaN;
261
262 // Convert NaNs as the processor would:
263 // - The sign is propagated.
264 // - The mantissa is transferred as much as possible, except that the top
265 // bit is forced to '1', making the result a quiet NaN.
266 float16 result =
267 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
268 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
269 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;
270 return result;
271 }
272
273 case FP_ZERO:
274 return (sign == 0) ? 0 : 0x8000;
275
276 case FP_INFINITE:
277 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
278
279 case FP_NORMAL:
280 case FP_SUBNORMAL: {
281 // Convert double-to-half as the processor would, assuming that FPCR.FZ
282 // (flush-to-zero) is not set.
283
284 // Add the implicit '1' bit to the mantissa.
285 mantissa += (UINT64_C(1) << kDoubleMantissaBits);
286 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
287 }
288 }
289
290 UNREACHABLE();
291 }
292
293 float Simulator::FPToFloat(double value, FPRounding round_mode) {
294 // Only the FPTieEven rounding mode is implemented.
295 DCHECK((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
296 USE(round_mode);
297
298 switch (std::fpclassify(value)) {
299 case FP_NAN: {
300 if (IsSignallingNaN(value)) {
301 FPProcessException();
302 }
303 if (DN()) return kFP32DefaultNaN;
304
305 // Convert NaNs as the processor would:
306 // - The sign is propagated.
307 // - The mantissa is transferred as much as possible, except that the
308 // top bit is forced to '1', making the result a quiet NaN.
309
310 uint64_t raw = bit_cast<uint64_t>(value);
311
312 uint32_t sign = raw >> 63;
313 uint32_t exponent = (1 << 8) - 1;
314 uint32_t mantissa = static_cast<uint32_t>(unsigned_bitextract_64(
315 50, kDoubleMantissaBits - kFloatMantissaBits, raw));
316 mantissa |= (1 << (kFloatMantissaBits - 1)); // Force a quiet NaN.
317
318 return float_pack(sign, exponent, mantissa);
319 }
320
321 case FP_ZERO:
322 case FP_INFINITE: {
323 // In a C++ cast, any value representable in the target type will be
324 // unchanged. This is always the case for +/-0.0 and infinities.
325 return static_cast<float>(value);
326 }
327
328 case FP_NORMAL:
329 case FP_SUBNORMAL: {
330 // Convert double-to-float as the processor would, assuming that FPCR.FZ
331 // (flush-to-zero) is not set.
332 uint32_t sign = double_sign(value);
333 int64_t exponent =
334 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
335 uint64_t mantissa = double_mantissa(value);
336 if (std::fpclassify(value) == FP_NORMAL) {
337 // For normal FP values, add the hidden bit.
338 mantissa |= (UINT64_C(1) << kDoubleMantissaBits);
339 }
340 return FPRoundToFloat(sign, exponent, mantissa, round_mode);
341 }
342 }
343
344 UNREACHABLE();
345 }
346
347 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
348 dst.ClearForWrite(vform);
349 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
350 dst.ReadUintFromMem(vform, i, addr);
351 addr += LaneSizeInBytesFromFormat(vform);
352 }
353 }
354
355 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index,
356 uint64_t addr) {
357 dst.ReadUintFromMem(vform, index, addr);
358 }
359
360 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
361 dst.ClearForWrite(vform);
362 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
363 dst.ReadUintFromMem(vform, i, addr);
364 }
365 }
366
367 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
368 LogicVRegister dst2, uint64_t addr1) {
369 dst1.ClearForWrite(vform);
370 dst2.ClearForWrite(vform);
371 int esize = LaneSizeInBytesFromFormat(vform);
372 uint64_t addr2 = addr1 + esize;
373 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
374 dst1.ReadUintFromMem(vform, i, addr1);
375 dst2.ReadUintFromMem(vform, i, addr2);
376 addr1 += 2 * esize;
377 addr2 += 2 * esize;
378 }
379 }
380
381 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
382 LogicVRegister dst2, int index, uint64_t addr1) {
383 dst1.ClearForWrite(vform);
384 dst2.ClearForWrite(vform);
385 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
386 dst1.ReadUintFromMem(vform, index, addr1);
387 dst2.ReadUintFromMem(vform, index, addr2);
388 }
389
390 void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1,
391 LogicVRegister dst2, uint64_t addr) {
392 dst1.ClearForWrite(vform);
393 dst2.ClearForWrite(vform);
394 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
395 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
396 dst1.ReadUintFromMem(vform, i, addr);
397 dst2.ReadUintFromMem(vform, i, addr2);
398 }
399 }
400
401 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
402 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) {
403 dst1.ClearForWrite(vform);
404 dst2.ClearForWrite(vform);
405 dst3.ClearForWrite(vform);
406 int esize = LaneSizeInBytesFromFormat(vform);
407 uint64_t addr2 = addr1 + esize;
408 uint64_t addr3 = addr2 + esize;
409 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
410 dst1.ReadUintFromMem(vform, i, addr1);
411 dst2.ReadUintFromMem(vform, i, addr2);
412 dst3.ReadUintFromMem(vform, i, addr3);
413 addr1 += 3 * esize;
414 addr2 += 3 * esize;
415 addr3 += 3 * esize;
416 }
417 }
418
419 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
420 LogicVRegister dst2, LogicVRegister dst3, int index,
421 uint64_t addr1) {
422 dst1.ClearForWrite(vform);
423 dst2.ClearForWrite(vform);
424 dst3.ClearForWrite(vform);
425 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
426 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
427 dst1.ReadUintFromMem(vform, index, addr1);
428 dst2.ReadUintFromMem(vform, index, addr2);
429 dst3.ReadUintFromMem(vform, index, addr3);
430 }
431
432 void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1,
433 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) {
434 dst1.ClearForWrite(vform);
435 dst2.ClearForWrite(vform);
436 dst3.ClearForWrite(vform);
437 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
438 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
439 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
440 dst1.ReadUintFromMem(vform, i, addr);
441 dst2.ReadUintFromMem(vform, i, addr2);
442 dst3.ReadUintFromMem(vform, i, addr3);
443 }
444 }
445
446 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
447 LogicVRegister dst2, LogicVRegister dst3,
448 LogicVRegister dst4, uint64_t addr1) {
449 dst1.ClearForWrite(vform);
450 dst2.ClearForWrite(vform);
451 dst3.ClearForWrite(vform);
452 dst4.ClearForWrite(vform);
453 int esize = LaneSizeInBytesFromFormat(vform);
454 uint64_t addr2 = addr1 + esize;
455 uint64_t addr3 = addr2 + esize;
456 uint64_t addr4 = addr3 + esize;
457 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
458 dst1.ReadUintFromMem(vform, i, addr1);
459 dst2.ReadUintFromMem(vform, i, addr2);
460 dst3.ReadUintFromMem(vform, i, addr3);
461 dst4.ReadUintFromMem(vform, i, addr4);
462 addr1 += 4 * esize;
463 addr2 += 4 * esize;
464 addr3 += 4 * esize;
465 addr4 += 4 * esize;
466 }
467 }
468
469 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
470 LogicVRegister dst2, LogicVRegister dst3,
471 LogicVRegister dst4, int index, uint64_t addr1) {
472 dst1.ClearForWrite(vform);
473 dst2.ClearForWrite(vform);
474 dst3.ClearForWrite(vform);
475 dst4.ClearForWrite(vform);
476 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
477 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
478 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
479 dst1.ReadUintFromMem(vform, index, addr1);
480 dst2.ReadUintFromMem(vform, index, addr2);
481 dst3.ReadUintFromMem(vform, index, addr3);
482 dst4.ReadUintFromMem(vform, index, addr4);
483 }
484
485 void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1,
486 LogicVRegister dst2, LogicVRegister dst3,
487 LogicVRegister dst4, uint64_t addr) {
488 dst1.ClearForWrite(vform);
489 dst2.ClearForWrite(vform);
490 dst3.ClearForWrite(vform);
491 dst4.ClearForWrite(vform);
492 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
493 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
494 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
495 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
496 dst1.ReadUintFromMem(vform, i, addr);
497 dst2.ReadUintFromMem(vform, i, addr2);
498 dst3.ReadUintFromMem(vform, i, addr3);
499 dst4.ReadUintFromMem(vform, i, addr4);
500 }
501 }
502
503 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
504 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
505 src.WriteUintToMem(vform, i, addr);
506 addr += LaneSizeInBytesFromFormat(vform);
507 }
508 }
509
510 void Simulator::st1(VectorFormat vform, LogicVRegister src, int index,
511 uint64_t addr) {
512 src.WriteUintToMem(vform, index, addr);
513 }
514
515 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
516 uint64_t addr) {
517 int esize = LaneSizeInBytesFromFormat(vform);
518 uint64_t addr2 = addr + esize;
519 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
520 dst.WriteUintToMem(vform, i, addr);
521 dst2.WriteUintToMem(vform, i, addr2);
522 addr += 2 * esize;
523 addr2 += 2 * esize;
524 }
525 }
526
527 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
528 int index, uint64_t addr) {
529 int esize = LaneSizeInBytesFromFormat(vform);
530 dst.WriteUintToMem(vform, index, addr);
531 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
532 }
533
534 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
535 LogicVRegister dst3, uint64_t addr) {
536 int esize = LaneSizeInBytesFromFormat(vform);
537 uint64_t addr2 = addr + esize;
538 uint64_t addr3 = addr2 + esize;
539 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
540 dst.WriteUintToMem(vform, i, addr);
541 dst2.WriteUintToMem(vform, i, addr2);
542 dst3.WriteUintToMem(vform, i, addr3);
543 addr += 3 * esize;
544 addr2 += 3 * esize;
545 addr3 += 3 * esize;
546 }
547 }
548
549 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
550 LogicVRegister dst3, int index, uint64_t addr) {
551 int esize = LaneSizeInBytesFromFormat(vform);
552 dst.WriteUintToMem(vform, index, addr);
553 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
554 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
555 }
556
557 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
558 LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) {
559 int esize = LaneSizeInBytesFromFormat(vform);
560 uint64_t addr2 = addr + esize;
561 uint64_t addr3 = addr2 + esize;
562 uint64_t addr4 = addr3 + esize;
563 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
564 dst.WriteUintToMem(vform, i, addr);
565 dst2.WriteUintToMem(vform, i, addr2);
566 dst3.WriteUintToMem(vform, i, addr3);
567 dst4.WriteUintToMem(vform, i, addr4);
568 addr += 4 * esize;
569 addr2 += 4 * esize;
570 addr3 += 4 * esize;
571 addr4 += 4 * esize;
572 }
573 }
574
575 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
576 LogicVRegister dst3, LogicVRegister dst4, int index,
577 uint64_t addr) {
578 int esize = LaneSizeInBytesFromFormat(vform);
579 dst.WriteUintToMem(vform, index, addr);
580 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
581 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
582 dst4.WriteUintToMem(vform, index, addr + 3 * esize);
583 }
584
585 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
586 const LogicVRegister& src1,
587 const LogicVRegister& src2, Condition cond) {
588 dst.ClearForWrite(vform);
589 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
590 int64_t sa = src1.Int(vform, i);
591 int64_t sb = src2.Int(vform, i);
592 uint64_t ua = src1.Uint(vform, i);
593 uint64_t ub = src2.Uint(vform, i);
594 bool result = false;
595 switch (cond) {
596 case eq:
597 result = (ua == ub);
598 break;
599 case ge:
600 result = (sa >= sb);
601 break;
602 case gt:
603 result = (sa > sb);
604 break;
605 case hi:
606 result = (ua > ub);
607 break;
608 case hs:
609 result = (ua >= ub);
610 break;
611 case lt:
612 result = (sa < sb);
613 break;
614 case le:
615 result = (sa <= sb);
616 break;
617 default:
618 UNREACHABLE();
619 }
620 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
621 }
622 return dst;
623 }
624
625 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
626 const LogicVRegister& src1, int imm,
627 Condition cond) {
628 SimVRegister temp;
629 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
630 return cmp(vform, dst, src1, imm_reg, cond);
631 }
632
633 LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst,
634 const LogicVRegister& src1,
635 const LogicVRegister& src2) {
636 dst.ClearForWrite(vform);
637 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
638 uint64_t ua = src1.Uint(vform, i);
639 uint64_t ub = src2.Uint(vform, i);
640 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
641 }
642 return dst;
643 }
644
645 LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst,
646 const LogicVRegister& src1,
647 const LogicVRegister& src2) {
648 int lane_size = LaneSizeInBitsFromFormat(vform);
649 dst.ClearForWrite(vform);
650 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
651 // Test for unsigned saturation.
652 uint64_t ua = src1.UintLeftJustified(vform, i);
653 uint64_t ub = src2.UintLeftJustified(vform, i);
654 uint64_t ur = ua + ub;
655 if (ur < ua) {
656 dst.SetUnsignedSat(i, true);
657 }
658
659 // Test for signed saturation.
660 bool pos_a = (ua >> 63) == 0;
661 bool pos_b = (ub >> 63) == 0;
662 bool pos_r = (ur >> 63) == 0;
663 // If the signs of the operands are the same, but different from the result,
664 // there was an overflow.
665 if ((pos_a == pos_b) && (pos_a != pos_r)) {
666 dst.SetSignedSat(i, pos_a);
667 }
668
669 dst.SetInt(vform, i, ur >> (64 - lane_size));
670 }
671 return dst;
672 }
673
674 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
675 const LogicVRegister& src1,
676 const LogicVRegister& src2) {
677 SimVRegister temp1, temp2;
678 uzp1(vform, temp1, src1, src2);
679 uzp2(vform, temp2, src1, src2);
680 add(vform, dst, temp1, temp2);
681 return dst;
682 }
683
684 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
685 const LogicVRegister& src1,
686 const LogicVRegister& src2) {
687 SimVRegister temp;
688 mul(vform, temp, src1, src2);
689 add(vform, dst, dst, temp);
690 return dst;
691 }
692
693 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
694 const LogicVRegister& src1,
695 const LogicVRegister& src2) {
696 SimVRegister temp;
697 mul(vform, temp, src1, src2);
698 sub(vform, dst, dst, temp);
699 return dst;
700 }
701
702 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
703 const LogicVRegister& src1,
704 const LogicVRegister& src2) {
705 dst.ClearForWrite(vform);
706 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
707 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
708 }
709 return dst;
710 }
711
712 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
713 const LogicVRegister& src1,
714 const LogicVRegister& src2, int index) {
715 SimVRegister temp;
716 VectorFormat indexform = VectorFormatFillQ(vform);
717 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
718 }
719
720 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
721 const LogicVRegister& src1,
722 const LogicVRegister& src2, int index) {
723 SimVRegister temp;
724 VectorFormat indexform = VectorFormatFillQ(vform);
725 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
726 }
727
728 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
729 const LogicVRegister& src1,
730 const LogicVRegister& src2, int index) {
731 SimVRegister temp;
732 VectorFormat indexform = VectorFormatFillQ(vform);
733 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
734 }
735
736 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
737 const LogicVRegister& src1,
738 const LogicVRegister& src2, int index) {
739 SimVRegister temp;
740 VectorFormat indexform =
741 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
742 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
743 }
744
745 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
746 const LogicVRegister& src1,
747 const LogicVRegister& src2, int index) {
748 SimVRegister temp;
749 VectorFormat indexform =
750 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
751 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
752 }
753
754 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
755 const LogicVRegister& src1,
756 const LogicVRegister& src2, int index) {
757 SimVRegister temp;
758 VectorFormat indexform =
759 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
760 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
761 }
762
763 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
764 const LogicVRegister& src1,
765 const LogicVRegister& src2, int index) {
766 SimVRegister temp;
767 VectorFormat indexform =
768 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
769 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
770 }
771
772 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
773 const LogicVRegister& src1,
774 const LogicVRegister& src2, int index) {
775 SimVRegister temp;
776 VectorFormat indexform =
777 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
778 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
779 }
780
781 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
782 const LogicVRegister& src1,
783 const LogicVRegister& src2, int index) {
784 SimVRegister temp;
785 VectorFormat indexform =
786 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
787 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
788 }
789
790 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
791 const LogicVRegister& src1,
792 const LogicVRegister& src2, int index) {
793 SimVRegister temp;
794 VectorFormat indexform =
795 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
796 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
797 }
798
799 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
800 const LogicVRegister& src1,
801 const LogicVRegister& src2, int index) {
802 SimVRegister temp;
803 VectorFormat indexform =
804 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
805 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
806 }
807
808 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
809 const LogicVRegister& src1,
810 const LogicVRegister& src2, int index) {
811 SimVRegister temp;
812 VectorFormat indexform =
813 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
814 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
815 }
816
817 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
818 const LogicVRegister& src1,
819 const LogicVRegister& src2, int index) {
820 SimVRegister temp;
821 VectorFormat indexform =
822 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
823 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
824 }
825
826 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
827 const LogicVRegister& src1,
828 const LogicVRegister& src2, int index) {
829 SimVRegister temp;
830 VectorFormat indexform =
831 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
832 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
833 }
834
835 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
836 const LogicVRegister& src1,
837 const LogicVRegister& src2, int index) {
838 SimVRegister temp;
839 VectorFormat indexform =
840 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
841 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
842 }
843
844 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
845 const LogicVRegister& src1,
846 const LogicVRegister& src2, int index) {
847 SimVRegister temp;
848 VectorFormat indexform =
849 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
850 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
851 }
852
853 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
854 const LogicVRegister& src1,
855 const LogicVRegister& src2, int index) {
856 SimVRegister temp;
857 VectorFormat indexform =
858 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
859 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
860 }
861
862 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
863 const LogicVRegister& src1,
864 const LogicVRegister& src2, int index) {
865 SimVRegister temp;
866 VectorFormat indexform =
867 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
868 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
869 }
870
871 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
872 const LogicVRegister& src1,
873 const LogicVRegister& src2, int index) {
874 SimVRegister temp;
875 VectorFormat indexform =
876 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
877 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
878 }
879
880 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
881 const LogicVRegister& src1,
882 const LogicVRegister& src2, int index) {
883 SimVRegister temp;
884 VectorFormat indexform =
885 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
886 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
887 }
888
889 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
890 const LogicVRegister& src1,
891 const LogicVRegister& src2, int index) {
892 SimVRegister temp;
893 VectorFormat indexform =
894 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
895 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
896 }
897
898 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
899 const LogicVRegister& src1,
900 const LogicVRegister& src2, int index) {
901 SimVRegister temp;
902 VectorFormat indexform = VectorFormatFillQ(vform);
903 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
904 }
905
906 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
907 const LogicVRegister& src1,
908 const LogicVRegister& src2, int index) {
909 SimVRegister temp;
910 VectorFormat indexform = VectorFormatFillQ(vform);
911 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
912 }
913
914 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
915 uint16_t result = 0;
916 uint16_t extended_op2 = op2;
917 for (int i = 0; i < 8; ++i) {
918 if ((op1 >> i) & 1) {
919 result = result ^ (extended_op2 << i);
920 }
921 }
922 return result;
923 }
924
925 LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst,
926 const LogicVRegister& src1,
927 const LogicVRegister& src2) {
928 dst.ClearForWrite(vform);
929 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
930 dst.SetUint(vform, i,
931 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
932 }
933 return dst;
934 }
935
936 LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst,
937 const LogicVRegister& src1,
938 const LogicVRegister& src2) {
939 VectorFormat vform_src = VectorFormatHalfWidth(vform);
940 dst.ClearForWrite(vform);
941 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
942 dst.SetUint(
943 vform, i,
944 PolynomialMult(src1.Uint(vform_src, i), src2.Uint(vform_src, i)));
945 }
946 return dst;
947 }
948
949 LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst,
950 const LogicVRegister& src1,
951 const LogicVRegister& src2) {
952 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
953 dst.ClearForWrite(vform);
954 int lane_count = LaneCountFromFormat(vform);
955 for (int i = 0; i < lane_count; i++) {
956 dst.SetUint(vform, i,
957 PolynomialMult(src1.Uint(vform_src, lane_count + i),
958 src2.Uint(vform_src, lane_count + i)));
959 }
960 return dst;
961 }
962
963 LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst,
964 const LogicVRegister& src1,
965 const LogicVRegister& src2) {
966 int lane_size = LaneSizeInBitsFromFormat(vform);
967 dst.ClearForWrite(vform);
968 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
969 // Test for unsigned saturation.
970 uint64_t ua = src1.UintLeftJustified(vform, i);
971 uint64_t ub = src2.UintLeftJustified(vform, i);
972 uint64_t ur = ua - ub;
973 if (ub > ua) {
974 dst.SetUnsignedSat(i, false);
975 }
976
977 // Test for signed saturation.
978 bool pos_a = (ua >> 63) == 0;
979 bool pos_b = (ub >> 63) == 0;
980 bool pos_r = (ur >> 63) == 0;
981 // If the signs of the operands are different, and the sign of the first
982 // operand doesn't match the result, there was an overflow.
983 if ((pos_a != pos_b) && (pos_a != pos_r)) {
984 dst.SetSignedSat(i, pos_a);
985 }
986
987 dst.SetInt(vform, i, ur >> (64 - lane_size));
988 }
989 return dst;
990 }
991
992 LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst,
993 const LogicVRegister& src1,
994 const LogicVRegister& src2) {
995 dst.ClearForWrite(vform);
996 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
997 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
998 }
999 return dst;
1000 }
1001
1002 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
1003 const LogicVRegister& src1,
1004 const LogicVRegister& src2) {
1005 dst.ClearForWrite(vform);
1006 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1007 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1008 }
1009 return dst;
1010 }
1011
1012 LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst,
1013 const LogicVRegister& src1,
1014 const LogicVRegister& src2) {
1015 dst.ClearForWrite(vform);
1016 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1017 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1018 }
1019 return dst;
1020 }
1021
1022 LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst,
1023 const LogicVRegister& src1,
1024 const LogicVRegister& src2) {
1025 dst.ClearForWrite(vform);
1026 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1027 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1028 }
1029 return dst;
1030 }
1031
1032 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1033 const LogicVRegister& src1,
1034 const LogicVRegister& src2) {
1035 dst.ClearForWrite(vform);
1036 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1037 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1038 }
1039 return dst;
1040 }
1041
1042 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1043 const LogicVRegister& src, uint64_t imm) {
1044 uint64_t result[16];
1045 int laneCount = LaneCountFromFormat(vform);
1046 for (int i = 0; i < laneCount; ++i) {
1047 result[i] = src.Uint(vform, i) & ~imm;
1048 }
1049 dst.SetUintArray(vform, result);
1050 return dst;
1051 }
1052
1053 LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst,
1054 const LogicVRegister& src1,
1055 const LogicVRegister& src2) {
1056 dst.ClearForWrite(vform);
1057 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1058 uint64_t operand1 = dst.Uint(vform, i);
1059 uint64_t operand2 = ~src2.Uint(vform, i);
1060 uint64_t operand3 = src1.Uint(vform, i);
1061 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1062 dst.SetUint(vform, i, result);
1063 }
1064 return dst;
1065 }
1066
1067 LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst,
1068 const LogicVRegister& src1,
1069 const LogicVRegister& src2) {
1070 dst.ClearForWrite(vform);
1071 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1072 uint64_t operand1 = dst.Uint(vform, i);
1073 uint64_t operand2 = src2.Uint(vform, i);
1074 uint64_t operand3 = src1.Uint(vform, i);
1075 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1076 dst.SetUint(vform, i, result);
1077 }
1078 return dst;
1079 }
1080
1081 LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst,
1082 const LogicVRegister& src1,
1083 const LogicVRegister& src2) {
1084 dst.ClearForWrite(vform);
1085 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1086 uint64_t operand1 = src2.Uint(vform, i);
1087 uint64_t operand2 = dst.Uint(vform, i);
1088 uint64_t operand3 = src1.Uint(vform, i);
1089 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1090 dst.SetUint(vform, i, result);
1091 }
1092 return dst;
1093 }
1094
1095 LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst,
1096 const LogicVRegister& src1,
1097 const LogicVRegister& src2, bool max) {
1098 dst.ClearForWrite(vform);
1099 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1100 int64_t src1_val = src1.Int(vform, i);
1101 int64_t src2_val = src2.Int(vform, i);
1102 int64_t dst_val;
1103 if (max) {
1104 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1105 } else {
1106 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1107 }
1108 dst.SetInt(vform, i, dst_val);
1109 }
1110 return dst;
1111 }
1112
1113 LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst,
1114 const LogicVRegister& src1,
1115 const LogicVRegister& src2) {
1116 return SMinMax(vform, dst, src1, src2, true);
1117 }
1118
1119 LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst,
1120 const LogicVRegister& src1,
1121 const LogicVRegister& src2) {
1122 return SMinMax(vform, dst, src1, src2, false);
1123 }
1124
1125 LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst,
1126 const LogicVRegister& src1,
1127 const LogicVRegister& src2, bool max) {
1128 int lanes = LaneCountFromFormat(vform);
1129 int64_t result[kMaxLanesPerVector];
1130 const LogicVRegister* src = &src1;
1131 for (int j = 0; j < 2; j++) {
1132 for (int i = 0; i < lanes; i += 2) {
1133 int64_t first_val = src->Int(vform, i);
1134 int64_t second_val = src->Int(vform, i + 1);
1135 int64_t dst_val;
1136 if (max) {
1137 dst_val = (first_val > second_val) ? first_val : second_val;
1138 } else {
1139 dst_val = (first_val < second_val) ? first_val : second_val;
1140 }
1141 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1142 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1143 }
1144 src = &src2;
1145 }
1146 dst.SetIntArray(vform, result);
1147 return dst;
1148 }
1149
1150 LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst,
1151 const LogicVRegister& src1,
1152 const LogicVRegister& src2) {
1153 return SMinMaxP(vform, dst, src1, src2, true);
1154 }
1155
1156 LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst,
1157 const LogicVRegister& src1,
1158 const LogicVRegister& src2) {
1159 return SMinMaxP(vform, dst, src1, src2, false);
1160 }
1161
1162 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
1163 const LogicVRegister& src) {
1164 DCHECK_EQ(vform, kFormatD);
1165
1166 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1167 dst.ClearForWrite(vform);
1168 dst.SetUint(vform, 0, dst_val);
1169 return dst;
1170 }
1171
1172 LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst,
1173 const LogicVRegister& src) {
1174 VectorFormat vform_dst =
1175 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1176
1177 int64_t dst_val = 0;
1178 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1179 dst_val += src.Int(vform, i);
1180 }
1181
1182 dst.ClearForWrite(vform_dst);
1183 dst.SetInt(vform_dst, 0, dst_val);
1184 return dst;
1185 }
1186
1187 LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst,
1188 const LogicVRegister& src) {
1189 VectorFormat vform_dst =
1190 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1191
1192 int64_t dst_val = 0;
1193 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1194 dst_val += src.Int(vform, i);
1195 }
1196
1197 dst.ClearForWrite(vform_dst);
1198 dst.SetInt(vform_dst, 0, dst_val);
1199 return dst;
1200 }
1201
1202 LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst,
1203 const LogicVRegister& src) {
1204 VectorFormat vform_dst =
1205 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1206
1207 uint64_t dst_val = 0;
1208 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1209 dst_val += src.Uint(vform, i);
1210 }
1211
1212 dst.ClearForWrite(vform_dst);
1213 dst.SetUint(vform_dst, 0, dst_val);
1214 return dst;
1215 }
1216
1217 LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst,
1218 const LogicVRegister& src, bool max) {
1219 int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1220 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1221 int64_t src_val = src.Int(vform, i);
1222 if (max) {
1223 dst_val = (src_val > dst_val) ? src_val : dst_val;
1224 } else {
1225 dst_val = (src_val < dst_val) ? src_val : dst_val;
1226 }
1227 }
1228 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1229 dst.SetInt(vform, 0, dst_val);
1230 return dst;
1231 }
1232
1233 LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst,
1234 const LogicVRegister& src) {
1235 SMinMaxV(vform, dst, src, true);
1236 return dst;
1237 }
1238
1239 LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst,
1240 const LogicVRegister& src) {
1241 SMinMaxV(vform, dst, src, false);
1242 return dst;
1243 }
1244
1245 LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst,
1246 const LogicVRegister& src1,
1247 const LogicVRegister& src2, bool max) {
1248 dst.ClearForWrite(vform);
1249 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1250 uint64_t src1_val = src1.Uint(vform, i);
1251 uint64_t src2_val = src2.Uint(vform, i);
1252 uint64_t dst_val;
1253 if (max) {
1254 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1255 } else {
1256 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1257 }
1258 dst.SetUint(vform, i, dst_val);
1259 }
1260 return dst;
1261 }
1262
1263 LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst,
1264 const LogicVRegister& src1,
1265 const LogicVRegister& src2) {
1266 return UMinMax(vform, dst, src1, src2, true);
1267 }
1268
1269 LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst,
1270 const LogicVRegister& src1,
1271 const LogicVRegister& src2) {
1272 return UMinMax(vform, dst, src1, src2, false);
1273 }
1274
1275 LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst,
1276 const LogicVRegister& src1,
1277 const LogicVRegister& src2, bool max) {
1278 int lanes = LaneCountFromFormat(vform);
1279 uint64_t result[kMaxLanesPerVector];
1280 const LogicVRegister* src = &src1;
1281 for (int j = 0; j < 2; j++) {
1282 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1283 uint64_t first_val = src->Uint(vform, i);
1284 uint64_t second_val = src->Uint(vform, i + 1);
1285 uint64_t dst_val;
1286 if (max) {
1287 dst_val = (first_val > second_val) ? first_val : second_val;
1288 } else {
1289 dst_val = (first_val < second_val) ? first_val : second_val;
1290 }
1291 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1292 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1293 }
1294 src = &src2;
1295 }
1296 dst.SetUintArray(vform, result);
1297 return dst;
1298 }
1299
1300 LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst,
1301 const LogicVRegister& src1,
1302 const LogicVRegister& src2) {
1303 return UMinMaxP(vform, dst, src1, src2, true);
1304 }
1305
1306 LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst,
1307 const LogicVRegister& src1,
1308 const LogicVRegister& src2) {
1309 return UMinMaxP(vform, dst, src1, src2, false);
1310 }
1311
1312 LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst,
1313 const LogicVRegister& src, bool max) {
1314 uint64_t dst_val = max ? 0 : UINT64_MAX;
1315 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1316 uint64_t src_val = src.Uint(vform, i);
1317 if (max) {
1318 dst_val = (src_val > dst_val) ? src_val : dst_val;
1319 } else {
1320 dst_val = (src_val < dst_val) ? src_val : dst_val;
1321 }
1322 }
1323 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1324 dst.SetUint(vform, 0, dst_val);
1325 return dst;
1326 }
1327
1328 LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst,
1329 const LogicVRegister& src) {
1330 UMinMaxV(vform, dst, src, true);
1331 return dst;
1332 }
1333
1334 LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst,
1335 const LogicVRegister& src) {
1336 UMinMaxV(vform, dst, src, false);
1337 return dst;
1338 }
1339
1340 LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst,
1341 const LogicVRegister& src, int shift) {
1342 DCHECK_GE(shift, 0);
1343 SimVRegister temp;
1344 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1345 return ushl(vform, dst, src, shiftreg);
1346 }
1347
1348 LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst,
1349 const LogicVRegister& src, int shift) {
1350 DCHECK_GE(shift, 0);
1351 SimVRegister temp1, temp2;
1352 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1353 LogicVRegister extendedreg = sxtl(vform, temp2, src);
1354 return sshl(vform, dst, extendedreg, shiftreg);
1355 }
1356
1357 LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst,
1358 const LogicVRegister& src, int shift) {
1359 DCHECK_GE(shift, 0);
1360 SimVRegister temp1, temp2;
1361 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1362 LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1363 return sshl(vform, dst, extendedreg, shiftreg);
1364 }
1365
1366 LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst,
1367 const LogicVRegister& src) {
1368 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1369 return sshll(vform, dst, src, shift);
1370 }
1371
1372 LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst,
1373 const LogicVRegister& src) {
1374 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1375 return sshll2(vform, dst, src, shift);
1376 }
1377
1378 LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst,
1379 const LogicVRegister& src, int shift) {
1380 DCHECK_GE(shift, 0);
1381 SimVRegister temp1, temp2;
1382 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1383 LogicVRegister extendedreg = uxtl(vform, temp2, src);
1384 return ushl(vform, dst, extendedreg, shiftreg);
1385 }
1386
1387 LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst,
1388 const LogicVRegister& src, int shift) {
1389 DCHECK_GE(shift, 0);
1390 SimVRegister temp1, temp2;
1391 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1392 LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1393 return ushl(vform, dst, extendedreg, shiftreg);
1394 }
1395
1396 LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst,
1397 const LogicVRegister& src, int shift) {
1398 dst.ClearForWrite(vform);
1399 int laneCount = LaneCountFromFormat(vform);
1400 for (int i = 0; i < laneCount; i++) {
1401 uint64_t src_lane = src.Uint(vform, i);
1402 uint64_t dst_lane = dst.Uint(vform, i);
1403 uint64_t shifted = src_lane << shift;
1404 uint64_t mask = MaxUintFromFormat(vform) << shift;
1405 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1406 }
1407 return dst;
1408 }
1409
1410 LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst,
1411 const LogicVRegister& src, int shift) {
1412 DCHECK_GE(shift, 0);
1413 SimVRegister temp;
1414 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1415 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1416 }
1417
1418 LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst,
1419 const LogicVRegister& src, int shift) {
1420 DCHECK_GE(shift, 0);
1421 SimVRegister temp;
1422 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1423 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1424 }
1425
1426 LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst,
1427 const LogicVRegister& src, int shift) {
1428 DCHECK_GE(shift, 0);
1429 SimVRegister temp;
1430 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1431 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1432 }
1433
1434 LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst,
1435 const LogicVRegister& src, int shift) {
1436 dst.ClearForWrite(vform);
1437 int laneCount = LaneCountFromFormat(vform);
1438 DCHECK((shift > 0) &&
1439 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1440 for (int i = 0; i < laneCount; i++) {
1441 uint64_t src_lane = src.Uint(vform, i);
1442 uint64_t dst_lane = dst.Uint(vform, i);
1443 uint64_t shifted;
1444 uint64_t mask;
1445 if (shift == 64) {
1446 shifted = 0;
1447 mask = 0;
1448 } else {
1449 shifted = src_lane >> shift;
1450 mask = MaxUintFromFormat(vform) >> shift;
1451 }
1452 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1453 }
1454 return dst;
1455 }
1456
1457 LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst,
1458 const LogicVRegister& src, int shift) {
1459 DCHECK_GE(shift, 0);
1460 SimVRegister temp;
1461 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1462 return ushl(vform, dst, src, shiftreg);
1463 }
1464
1465 LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst,
1466 const LogicVRegister& src, int shift) {
1467 DCHECK_GE(shift, 0);
1468 SimVRegister temp;
1469 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1470 return sshl(vform, dst, src, shiftreg);
1471 }
1472
1473 LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst,
1474 const LogicVRegister& src, int shift) {
1475 SimVRegister temp;
1476 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1477 return add(vform, dst, dst, shifted_reg);
1478 }
1479
1480 LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst,
1481 const LogicVRegister& src, int shift) {
1482 SimVRegister temp;
1483 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1484 return add(vform, dst, dst, shifted_reg);
1485 }
1486
1487 LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst,
1488 const LogicVRegister& src, int shift) {
1489 SimVRegister temp;
1490 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1491 return add(vform, dst, dst, shifted_reg);
1492 }
1493
1494 LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst,
1495 const LogicVRegister& src, int shift) {
1496 SimVRegister temp;
1497 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1498 return add(vform, dst, dst, shifted_reg);
1499 }
1500
1501 LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst,
1502 const LogicVRegister& src) {
1503 uint64_t result[16];
1504 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1505 int laneCount = LaneCountFromFormat(vform);
1506 for (int i = 0; i < laneCount; i++) {
1507 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1508 }
1509
1510 dst.SetUintArray(vform, result);
1511 return dst;
1512 }
1513
1514 LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst,
1515 const LogicVRegister& src) {
1516 uint64_t result[16];
1517 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1518 int laneCount = LaneCountFromFormat(vform);
1519 for (int i = 0; i < laneCount; i++) {
1520 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1521 }
1522
1523 dst.SetUintArray(vform, result);
1524 return dst;
1525 }
1526
1527 LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst,
1528 const LogicVRegister& src) {
1529 uint64_t result[16];
1530 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1531 int laneCount = LaneCountFromFormat(vform);
1532 for (int i = 0; i < laneCount; i++) {
1533 uint64_t value = src.Uint(vform, i);
1534 result[i] = 0;
1535 for (int j = 0; j < laneSizeInBits; j++) {
1536 result[i] += (value & 1);
1537 value >>= 1;
1538 }
1539 }
1540
1541 dst.SetUintArray(vform, result);
1542 return dst;
1543 }
1544
1545 LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst,
1546 const LogicVRegister& src1,
1547 const LogicVRegister& src2) {
1548 dst.ClearForWrite(vform);
1549 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1550 int8_t shift_val = src2.Int(vform, i);
1551 int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1552
1553 // Set signed saturation state.
1554 if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) &&
1555 (lj_src_val != 0)) {
1556 dst.SetSignedSat(i, lj_src_val >= 0);
1557 }
1558
1559 // Set unsigned saturation state.
1560 if (lj_src_val < 0) {
1561 dst.SetUnsignedSat(i, false);
1562 } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) &&
1563 (lj_src_val != 0)) {
1564 dst.SetUnsignedSat(i, true);
1565 }
1566
1567 int64_t src_val = src1.Int(vform, i);
1568 bool src_is_negative = src_val < 0;
1569 if (shift_val > 63) {
1570 dst.SetInt(vform, i, 0);
1571 } else if (shift_val < -63) {
1572 dst.SetRounding(i, src_is_negative);
1573 dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1574 } else {
1575 // Use unsigned types for shifts, as behaviour is undefined for signed
1576 // lhs.
1577 uint64_t usrc_val = static_cast<uint64_t>(src_val);
1578
1579 if (shift_val < 0) {
1580 // Convert to right shift.
1581 shift_val = -shift_val;
1582
1583 // Set rounding state by testing most-significant bit shifted out.
1584 // Rounding only needed on right shifts.
1585 if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1586 dst.SetRounding(i, true);
1587 }
1588
1589 usrc_val >>= shift_val;
1590
1591 if (src_is_negative) {
1592 // Simulate sign-extension.
1593 usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1594 }
1595 } else {
1596 usrc_val <<= shift_val;
1597 }
1598 dst.SetUint(vform, i, usrc_val);
1599 }
1600 }
1601 return dst;
1602 }
1603
1604 LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst,
1605 const LogicVRegister& src1,
1606 const LogicVRegister& src2) {
1607 dst.ClearForWrite(vform);
1608 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1609 int8_t shift_val = src2.Int(vform, i);
1610 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1611
1612 // Set saturation state.
1613 if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) {
1614 dst.SetUnsignedSat(i, true);
1615 }
1616
1617 uint64_t src_val = src1.Uint(vform, i);
1618 if ((shift_val > 63) || (shift_val < -64)) {
1619 dst.SetUint(vform, i, 0);
1620 } else {
1621 if (shift_val < 0) {
1622 // Set rounding state. Rounding only needed on right shifts.
1623 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1624 dst.SetRounding(i, true);
1625 }
1626
1627 if (shift_val == -64) {
1628 src_val = 0;
1629 } else {
1630 src_val >>= -shift_val;
1631 }
1632 } else {
1633 src_val <<= shift_val;
1634 }
1635 dst.SetUint(vform, i, src_val);
1636 }
1637 }
1638 return dst;
1639 }
1640
1641 LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst,
1642 const LogicVRegister& src) {
1643 dst.ClearForWrite(vform);
1644 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1645 // Test for signed saturation.
1646 int64_t sa = src.Int(vform, i);
1647 if (sa == MinIntFromFormat(vform)) {
1648 dst.SetSignedSat(i, true);
1649 }
1650 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1651 }
1652 return dst;
1653 }
1654
1655 LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst,
1656 const LogicVRegister& src) {
1657 dst.ClearForWrite(vform);
1658 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1659 int64_t sa = dst.IntLeftJustified(vform, i);
1660 uint64_t ub = src.UintLeftJustified(vform, i);
1661 uint64_t ur = sa + ub;
1662
1663 int64_t sr = bit_cast<int64_t>(ur);
1664 if (sr < sa) { // Test for signed positive saturation.
1665 dst.SetInt(vform, i, MaxIntFromFormat(vform));
1666 } else {
1667 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
1668 }
1669 }
1670 return dst;
1671 }
1672
1673 LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst,
1674 const LogicVRegister& src) {
1675 dst.ClearForWrite(vform);
1676 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1677 uint64_t ua = dst.UintLeftJustified(vform, i);
1678 int64_t sb = src.IntLeftJustified(vform, i);
1679 uint64_t ur = ua + sb;
1680
1681 if ((sb > 0) && (ur <= ua)) {
1682 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
1683 } else if ((sb < 0) && (ur >= ua)) {
1684 dst.SetUint(vform, i, 0); // Negative saturation.
1685 } else {
1686 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
1687 }
1688 }
1689 return dst;
1690 }
1691
1692 LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst,
1693 const LogicVRegister& src) {
1694 dst.ClearForWrite(vform);
1695 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1696 // Test for signed saturation.
1697 int64_t sa = src.Int(vform, i);
1698 if (sa == MinIntFromFormat(vform)) {
1699 dst.SetSignedSat(i, true);
1700 }
1701 if (sa < 0) {
1702 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1703 } else {
1704 dst.SetInt(vform, i, sa);
1705 }
1706 }
1707 return dst;
1708 }
1709
1710 LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform,
1711 LogicVRegister dst, bool dstIsSigned,
1712 const LogicVRegister& src,
1713 bool srcIsSigned) {
1714 bool upperhalf = false;
1715 VectorFormat srcform = kFormatUndefined;
1716 int64_t ssrc[8];
1717 uint64_t usrc[8];
1718
1719 switch (dstform) {
1720 case kFormat8B:
1721 upperhalf = false;
1722 srcform = kFormat8H;
1723 break;
1724 case kFormat16B:
1725 upperhalf = true;
1726 srcform = kFormat8H;
1727 break;
1728 case kFormat4H:
1729 upperhalf = false;
1730 srcform = kFormat4S;
1731 break;
1732 case kFormat8H:
1733 upperhalf = true;
1734 srcform = kFormat4S;
1735 break;
1736 case kFormat2S:
1737 upperhalf = false;
1738 srcform = kFormat2D;
1739 break;
1740 case kFormat4S:
1741 upperhalf = true;
1742 srcform = kFormat2D;
1743 break;
1744 case kFormatB:
1745 upperhalf = false;
1746 srcform = kFormatH;
1747 break;
1748 case kFormatH:
1749 upperhalf = false;
1750 srcform = kFormatS;
1751 break;
1752 case kFormatS:
1753 upperhalf = false;
1754 srcform = kFormatD;
1755 break;
1756 default:
1757 UNIMPLEMENTED();
1758 }
1759
1760 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1761 ssrc[i] = src.Int(srcform, i);
1762 usrc[i] = src.Uint(srcform, i);
1763 }
1764
1765 int offset;
1766 if (upperhalf) {
1767 offset = LaneCountFromFormat(dstform) / 2;
1768 } else {
1769 offset = 0;
1770 dst.ClearForWrite(dstform);
1771 }
1772
1773 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1774 // Test for signed saturation
1775 if (ssrc[i] > MaxIntFromFormat(dstform)) {
1776 dst.SetSignedSat(offset + i, true);
1777 } else if (ssrc[i] < MinIntFromFormat(dstform)) {
1778 dst.SetSignedSat(offset + i, false);
1779 }
1780
1781 // Test for unsigned saturation
1782 if (srcIsSigned) {
1783 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
1784 dst.SetUnsignedSat(offset + i, true);
1785 } else if (ssrc[i] < 0) {
1786 dst.SetUnsignedSat(offset + i, false);
1787 }
1788 } else {
1789 if (usrc[i] > MaxUintFromFormat(dstform)) {
1790 dst.SetUnsignedSat(offset + i, true);
1791 }
1792 }
1793
1794 int64_t result;
1795 if (srcIsSigned) {
1796 result = ssrc[i] & MaxUintFromFormat(dstform);
1797 } else {
1798 result = usrc[i] & MaxUintFromFormat(dstform);
1799 }
1800
1801 if (dstIsSigned) {
1802 dst.SetInt(dstform, offset + i, result);
1803 } else {
1804 dst.SetUint(dstform, offset + i, result);
1805 }
1806 }
1807 return dst;
1808 }
1809
1810 LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst,
1811 const LogicVRegister& src) {
1812 return ExtractNarrow(vform, dst, true, src, true);
1813 }
1814
1815 LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst,
1816 const LogicVRegister& src) {
1817 return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform);
1818 }
1819
1820 LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst,
1821 const LogicVRegister& src) {
1822 return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
1823 }
1824
1825 LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst,
1826 const LogicVRegister& src) {
1827 return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
1828 }
1829
1830 LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst,
1831 const LogicVRegister& src1,
1832 const LogicVRegister& src2, bool issigned) {
1833 dst.ClearForWrite(vform);
1834 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1835 if (issigned) {
1836 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
1837 sr = sr > 0 ? sr : -sr;
1838 dst.SetInt(vform, i, sr);
1839 } else {
1840 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
1841 sr = sr > 0 ? sr : -sr;
1842 dst.SetUint(vform, i, sr);
1843 }
1844 }
1845 return dst;
1846 }
1847
1848 LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst,
1849 const LogicVRegister& src1,
1850 const LogicVRegister& src2) {
1851 SimVRegister temp;
1852 dst.ClearForWrite(vform);
1853 AbsDiff(vform, temp, src1, src2, true);
1854 add(vform, dst, dst, temp);
1855 return dst;
1856 }
1857
1858 LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst,
1859 const LogicVRegister& src1,
1860 const LogicVRegister& src2) {
1861 SimVRegister temp;
1862 dst.ClearForWrite(vform);
1863 AbsDiff(vform, temp, src1, src2, false);
1864 add(vform, dst, dst, temp);
1865 return dst;
1866 }
1867
1868 LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst,
1869 const LogicVRegister& src) {
1870 dst.ClearForWrite(vform);
1871 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1872 dst.SetUint(vform, i, ~src.Uint(vform, i));
1873 }
1874 return dst;
1875 }
1876
1877 LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst,
1878 const LogicVRegister& src) {
1879 uint64_t result[16];
1880 int laneCount = LaneCountFromFormat(vform);
1881 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1882 uint64_t reversed_value;
1883 uint64_t value;
1884 for (int i = 0; i < laneCount; i++) {
1885 value = src.Uint(vform, i);
1886 reversed_value = 0;
1887 for (int j = 0; j < laneSizeInBits; j++) {
1888 reversed_value = (reversed_value << 1) | (value & 1);
1889 value >>= 1;
1890 }
1891 result[i] = reversed_value;
1892 }
1893
1894 dst.SetUintArray(vform, result);
1895 return dst;
1896 }
1897
1898 LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst,
1899 const LogicVRegister& src, int revSize) {
1900 uint64_t result[16];
1901 int laneCount = LaneCountFromFormat(vform);
1902 int laneSize = LaneSizeInBytesFromFormat(vform);
1903 int lanesPerLoop = revSize / laneSize;
1904 for (int i = 0; i < laneCount; i += lanesPerLoop) {
1905 for (int j = 0; j < lanesPerLoop; j++) {
1906 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
1907 }
1908 }
1909 dst.SetUintArray(vform, result);
1910 return dst;
1911 }
1912
1913 LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst,
1914 const LogicVRegister& src) {
1915 return rev(vform, dst, src, 2);
1916 }
1917
1918 LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst,
1919 const LogicVRegister& src) {
1920 return rev(vform, dst, src, 4);
1921 }
1922
1923 LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst,
1924 const LogicVRegister& src) {
1925 return rev(vform, dst, src, 8);
1926 }
1927
1928 LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst,
1929 const LogicVRegister& src, bool is_signed,
1930 bool do_accumulate) {
1931 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
1932 DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U);
1933 DCHECK_LE(LaneCountFromFormat(vform), 8);
1934
1935 uint64_t result[8];
1936 int lane_count = LaneCountFromFormat(vform);
1937 for (int i = 0; i < lane_count; i++) {
1938 if (is_signed) {
1939 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
1940 src.Int(vformsrc, 2 * i + 1));
1941 } else {
1942 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
1943 }
1944 }
1945
1946 dst.ClearForWrite(vform);
1947 for (int i = 0; i < lane_count; ++i) {
1948 if (do_accumulate) {
1949 result[i] += dst.Uint(vform, i);
1950 }
1951 dst.SetUint(vform, i, result[i]);
1952 }
1953
1954 return dst;
1955 }
1956
1957 LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst,
1958 const LogicVRegister& src) {
1959 return addlp(vform, dst, src, true, false);
1960 }
1961
1962 LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst,
1963 const LogicVRegister& src) {
1964 return addlp(vform, dst, src, false, false);
1965 }
1966
1967 LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst,
1968 const LogicVRegister& src) {
1969 return addlp(vform, dst, src, true, true);
1970 }
1971
1972 LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst,
1973 const LogicVRegister& src) {
1974 return addlp(vform, dst, src, false, true);
1975 }
1976
1977 LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst,
1978 const LogicVRegister& src1,
1979 const LogicVRegister& src2, int index) {
1980 uint8_t result[16];
1981 int laneCount = LaneCountFromFormat(vform);
1982 for (int i = 0; i < laneCount - index; ++i) {
1983 result[i] = src1.Uint(vform, i + index);
1984 }
1985 for (int i = 0; i < index; ++i) {
1986 result[laneCount - index + i] = src2.Uint(vform, i);
1987 }
1988 dst.ClearForWrite(vform);
1989 for (int i = 0; i < laneCount; ++i) {
1990 dst.SetUint(vform, i, result[i]);
1991 }
1992 return dst;
1993 }
1994
1995 LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst,
1996 const LogicVRegister& src,
1997 int src_index) {
1998 int laneCount = LaneCountFromFormat(vform);
1999 uint64_t value = src.Uint(vform, src_index);
2000 dst.ClearForWrite(vform);
2001 for (int i = 0; i < laneCount; ++i) {
2002 dst.SetUint(vform, i, value);
2003 }
2004 return dst;
2005 }
2006
2007 LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst,
2008 uint64_t imm) {
2009 int laneCount = LaneCountFromFormat(vform);
2010 uint64_t value = imm & MaxUintFromFormat(vform);
2011 dst.ClearForWrite(vform);
2012 for (int i = 0; i < laneCount; ++i) {
2013 dst.SetUint(vform, i, value);
2014 }
2015 return dst;
2016 }
2017
2018 LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst,
2019 int dst_index, const LogicVRegister& src,
2020 int src_index) {
2021 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2022 return dst;
2023 }
2024
2025 LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst,
2026 int dst_index, uint64_t imm) {
2027 uint64_t value = imm & MaxUintFromFormat(vform);
2028 dst.SetUint(vform, dst_index, value);
2029 return dst;
2030 }
2031
2032 LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst,
2033 uint64_t imm) {
2034 int laneCount = LaneCountFromFormat(vform);
2035 dst.ClearForWrite(vform);
2036 for (int i = 0; i < laneCount; ++i) {
2037 dst.SetUint(vform, i, imm);
2038 }
2039 return dst;
2040 }
2041
2042 LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst,
2043 uint64_t imm) {
2044 int laneCount = LaneCountFromFormat(vform);
2045 dst.ClearForWrite(vform);
2046 for (int i = 0; i < laneCount; ++i) {
2047 dst.SetUint(vform, i, ~imm);
2048 }
2049 return dst;
2050 }
2051
2052 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
2053 const LogicVRegister& src, uint64_t imm) {
2054 uint64_t result[16];
2055 int laneCount = LaneCountFromFormat(vform);
2056 for (int i = 0; i < laneCount; ++i) {
2057 result[i] = src.Uint(vform, i) | imm;
2058 }
2059 dst.SetUintArray(vform, result);
2060 return dst;
2061 }
2062
2063 LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst,
2064 const LogicVRegister& src) {
2065 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2066
2067 dst.ClearForWrite(vform);
2068 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2069 dst.SetUint(vform, i, src.Uint(vform_half, i));
2070 }
2071 return dst;
2072 }
2073
2074 LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst,
2075 const LogicVRegister& src) {
2076 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2077
2078 dst.ClearForWrite(vform);
2079 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2080 dst.SetInt(vform, i, src.Int(vform_half, i));
2081 }
2082 return dst;
2083 }
2084
2085 LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst,
2086 const LogicVRegister& src) {
2087 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2088 int lane_count = LaneCountFromFormat(vform);
2089
2090 dst.ClearForWrite(vform);
2091 for (int i = 0; i < lane_count; i++) {
2092 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2093 }
2094 return dst;
2095 }
2096
2097 LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst,
2098 const LogicVRegister& src) {
2099 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2100 int lane_count = LaneCountFromFormat(vform);
2101
2102 dst.ClearForWrite(vform);
2103 for (int i = 0; i < lane_count; i++) {
2104 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2105 }
2106 return dst;
2107 }
2108
2109 LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst,
2110 const LogicVRegister& src, int shift) {
2111 SimVRegister temp;
2112 VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2113 VectorFormat vform_dst = vform;
2114 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2115 return ExtractNarrow(vform_dst, dst, false, shifted_src, false);
2116 }
2117
2118 LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst,
2119 const LogicVRegister& src, int shift) {
2120 SimVRegister temp;
2121 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2122 VectorFormat vformdst = vform;
2123 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2124 return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2125 }
2126
2127 LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst,
2128 const LogicVRegister& src, int shift) {
2129 SimVRegister temp;
2130 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2131 VectorFormat vformdst = vform;
2132 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2133 return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2134 }
2135
2136 LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst,
2137 const LogicVRegister& src, int shift) {
2138 SimVRegister temp;
2139 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2140 VectorFormat vformdst = vform;
2141 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2142 return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2143 }
2144
2145 LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst,
2146 const LogicVRegister& ind,
2147 bool zero_out_of_bounds,
2148 const LogicVRegister* tab1,
2149 const LogicVRegister* tab2,
2150 const LogicVRegister* tab3,
2151 const LogicVRegister* tab4) {
2152 DCHECK_NOT_NULL(tab1);
2153 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
2154 uint64_t result[kMaxLanesPerVector];
2155 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2156 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
2157 }
2158 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2159 uint64_t j = ind.Uint(vform, i);
2160 int tab_idx = static_cast<int>(j >> 4);
2161 int j_idx = static_cast<int>(j & 15);
2162 if ((tab_idx < 4) && (tab[tab_idx] != NULL)) {
2163 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
2164 }
2165 }
2166 dst.SetUintArray(vform, result);
2167 return dst;
2168 }
2169
2170 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2171 const LogicVRegister& tab,
2172 const LogicVRegister& ind) {
2173 return Table(vform, dst, ind, true, &tab);
2174 }
2175
2176 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2177 const LogicVRegister& tab,
2178 const LogicVRegister& tab2,
2179 const LogicVRegister& ind) {
2180 return Table(vform, dst, ind, true, &tab, &tab2);
2181 }
2182
2183 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2184 const LogicVRegister& tab,
2185 const LogicVRegister& tab2,
2186 const LogicVRegister& tab3,
2187 const LogicVRegister& ind) {
2188 return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
2189 }
2190
2191 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2192 const LogicVRegister& tab,
2193 const LogicVRegister& tab2,
2194 const LogicVRegister& tab3,
2195 const LogicVRegister& tab4,
2196 const LogicVRegister& ind) {
2197 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
2198 }
2199
2200 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2201 const LogicVRegister& tab,
2202 const LogicVRegister& ind) {
2203 return Table(vform, dst, ind, false, &tab);
2204 }
2205
2206 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2207 const LogicVRegister& tab,
2208 const LogicVRegister& tab2,
2209 const LogicVRegister& ind) {
2210 return Table(vform, dst, ind, false, &tab, &tab2);
2211 }
2212
2213 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2214 const LogicVRegister& tab,
2215 const LogicVRegister& tab2,
2216 const LogicVRegister& tab3,
2217 const LogicVRegister& ind) {
2218 return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
2219 }
2220
2221 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2222 const LogicVRegister& tab,
2223 const LogicVRegister& tab2,
2224 const LogicVRegister& tab3,
2225 const LogicVRegister& tab4,
2226 const LogicVRegister& ind) {
2227 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
2228 }
2229
2230 LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst,
2231 const LogicVRegister& src, int shift) {
2232 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2233 }
2234
2235 LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst,
2236 const LogicVRegister& src, int shift) {
2237 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2238 }
2239
2240 LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst,
2241 const LogicVRegister& src, int shift) {
2242 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2243 }
2244
2245 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst,
2246 const LogicVRegister& src, int shift) {
2247 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2248 }
2249
2250 LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst,
2251 const LogicVRegister& src, int shift) {
2252 SimVRegister temp;
2253 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2254 VectorFormat vformdst = vform;
2255 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2256 return sqxtn(vformdst, dst, shifted_src);
2257 }
2258
2259 LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst,
2260 const LogicVRegister& src, int shift) {
2261 SimVRegister temp;
2262 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2263 VectorFormat vformdst = vform;
2264 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2265 return sqxtn(vformdst, dst, shifted_src);
2266 }
2267
2268 LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst,
2269 const LogicVRegister& src, int shift) {
2270 SimVRegister temp;
2271 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2272 VectorFormat vformdst = vform;
2273 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2274 return sqxtn(vformdst, dst, shifted_src);
2275 }
2276
2277 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst,
2278 const LogicVRegister& src, int shift) {
2279 SimVRegister temp;
2280 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2281 VectorFormat vformdst = vform;
2282 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2283 return sqxtn(vformdst, dst, shifted_src);
2284 }
2285
2286 LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst,
2287 const LogicVRegister& src, int shift) {
2288 SimVRegister temp;
2289 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2290 VectorFormat vformdst = vform;
2291 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2292 return sqxtun(vformdst, dst, shifted_src);
2293 }
2294
2295 LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst,
2296 const LogicVRegister& src, int shift) {
2297 SimVRegister temp;
2298 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2299 VectorFormat vformdst = vform;
2300 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2301 return sqxtun(vformdst, dst, shifted_src);
2302 }
2303
2304 LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst,
2305 const LogicVRegister& src, int shift) {
2306 SimVRegister temp;
2307 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2308 VectorFormat vformdst = vform;
2309 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2310 return sqxtun(vformdst, dst, shifted_src);
2311 }
2312
2313 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst,
2314 const LogicVRegister& src, int shift) {
2315 SimVRegister temp;
2316 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2317 VectorFormat vformdst = vform;
2318 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2319 return sqxtun(vformdst, dst, shifted_src);
2320 }
2321
2322 LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst,
2323 const LogicVRegister& src1,
2324 const LogicVRegister& src2) {
2325 SimVRegister temp1, temp2;
2326 uxtl(vform, temp1, src1);
2327 uxtl(vform, temp2, src2);
2328 add(vform, dst, temp1, temp2);
2329 return dst;
2330 }
2331
2332 LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst,
2333 const LogicVRegister& src1,
2334 const LogicVRegister& src2) {
2335 SimVRegister temp1, temp2;
2336 uxtl2(vform, temp1, src1);
2337 uxtl2(vform, temp2, src2);
2338 add(vform, dst, temp1, temp2);
2339 return dst;
2340 }
2341
2342 LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst,
2343 const LogicVRegister& src1,
2344 const LogicVRegister& src2) {
2345 SimVRegister temp;
2346 uxtl(vform, temp, src2);
2347 add(vform, dst, src1, temp);
2348 return dst;
2349 }
2350
2351 LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst,
2352 const LogicVRegister& src1,
2353 const LogicVRegister& src2) {
2354 SimVRegister temp;
2355 uxtl2(vform, temp, src2);
2356 add(vform, dst, src1, temp);
2357 return dst;
2358 }
2359
2360 LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst,
2361 const LogicVRegister& src1,
2362 const LogicVRegister& src2) {
2363 SimVRegister temp1, temp2;
2364 sxtl(vform, temp1, src1);
2365 sxtl(vform, temp2, src2);
2366 add(vform, dst, temp1, temp2);
2367 return dst;
2368 }
2369
2370 LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst,
2371 const LogicVRegister& src1,
2372 const LogicVRegister& src2) {
2373 SimVRegister temp1, temp2;
2374 sxtl2(vform, temp1, src1);
2375 sxtl2(vform, temp2, src2);
2376 add(vform, dst, temp1, temp2);
2377 return dst;
2378 }
2379
2380 LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst,
2381 const LogicVRegister& src1,
2382 const LogicVRegister& src2) {
2383 SimVRegister temp;
2384 sxtl(vform, temp, src2);
2385 add(vform, dst, src1, temp);
2386 return dst;
2387 }
2388
2389 LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst,
2390 const LogicVRegister& src1,
2391 const LogicVRegister& src2) {
2392 SimVRegister temp;
2393 sxtl2(vform, temp, src2);
2394 add(vform, dst, src1, temp);
2395 return dst;
2396 }
2397
2398 LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst,
2399 const LogicVRegister& src1,
2400 const LogicVRegister& src2) {
2401 SimVRegister temp1, temp2;
2402 uxtl(vform, temp1, src1);
2403 uxtl(vform, temp2, src2);
2404 sub(vform, dst, temp1, temp2);
2405 return dst;
2406 }
2407
2408 LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst,
2409 const LogicVRegister& src1,
2410 const LogicVRegister& src2) {
2411 SimVRegister temp1, temp2;
2412 uxtl2(vform, temp1, src1);
2413 uxtl2(vform, temp2, src2);
2414 sub(vform, dst, temp1, temp2);
2415 return dst;
2416 }
2417
2418 LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst,
2419 const LogicVRegister& src1,
2420 const LogicVRegister& src2) {
2421 SimVRegister temp;
2422 uxtl(vform, temp, src2);
2423 sub(vform, dst, src1, temp);
2424 return dst;
2425 }
2426
2427 LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst,
2428 const LogicVRegister& src1,
2429 const LogicVRegister& src2) {
2430 SimVRegister temp;
2431 uxtl2(vform, temp, src2);
2432 sub(vform, dst, src1, temp);
2433 return dst;
2434 }
2435
2436 LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst,
2437 const LogicVRegister& src1,
2438 const LogicVRegister& src2) {
2439 SimVRegister temp1, temp2;
2440 sxtl(vform, temp1, src1);
2441 sxtl(vform, temp2, src2);
2442 sub(vform, dst, temp1, temp2);
2443 return dst;
2444 }
2445
2446 LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst,
2447 const LogicVRegister& src1,
2448 const LogicVRegister& src2) {
2449 SimVRegister temp1, temp2;
2450 sxtl2(vform, temp1, src1);
2451 sxtl2(vform, temp2, src2);
2452 sub(vform, dst, temp1, temp2);
2453 return dst;
2454 }
2455
2456 LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst,
2457 const LogicVRegister& src1,
2458 const LogicVRegister& src2) {
2459 SimVRegister temp;
2460 sxtl(vform, temp, src2);
2461 sub(vform, dst, src1, temp);
2462 return dst;
2463 }
2464
2465 LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst,
2466 const LogicVRegister& src1,
2467 const LogicVRegister& src2) {
2468 SimVRegister temp;
2469 sxtl2(vform, temp, src2);
2470 sub(vform, dst, src1, temp);
2471 return dst;
2472 }
2473
2474 LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst,
2475 const LogicVRegister& src1,
2476 const LogicVRegister& src2) {
2477 SimVRegister temp1, temp2;
2478 uxtl(vform, temp1, src1);
2479 uxtl(vform, temp2, src2);
2480 uaba(vform, dst, temp1, temp2);
2481 return dst;
2482 }
2483
2484 LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst,
2485 const LogicVRegister& src1,
2486 const LogicVRegister& src2) {
2487 SimVRegister temp1, temp2;
2488 uxtl2(vform, temp1, src1);
2489 uxtl2(vform, temp2, src2);
2490 uaba(vform, dst, temp1, temp2);
2491 return dst;
2492 }
2493
2494 LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst,
2495 const LogicVRegister& src1,
2496 const LogicVRegister& src2) {
2497 SimVRegister temp1, temp2;
2498 sxtl(vform, temp1, src1);
2499 sxtl(vform, temp2, src2);
2500 saba(vform, dst, temp1, temp2);
2501 return dst;
2502 }
2503
2504 LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst,
2505 const LogicVRegister& src1,
2506 const LogicVRegister& src2) {
2507 SimVRegister temp1, temp2;
2508 sxtl2(vform, temp1, src1);
2509 sxtl2(vform, temp2, src2);
2510 saba(vform, dst, temp1, temp2);
2511 return dst;
2512 }
2513
2514 LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst,
2515 const LogicVRegister& src1,
2516 const LogicVRegister& src2) {
2517 SimVRegister temp1, temp2;
2518 uxtl(vform, temp1, src1);
2519 uxtl(vform, temp2, src2);
2520 AbsDiff(vform, dst, temp1, temp2, false);
2521 return dst;
2522 }
2523
2524 LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst,
2525 const LogicVRegister& src1,
2526 const LogicVRegister& src2) {
2527 SimVRegister temp1, temp2;
2528 uxtl2(vform, temp1, src1);
2529 uxtl2(vform, temp2, src2);
2530 AbsDiff(vform, dst, temp1, temp2, false);
2531 return dst;
2532 }
2533
2534 LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst,
2535 const LogicVRegister& src1,
2536 const LogicVRegister& src2) {
2537 SimVRegister temp1, temp2;
2538 sxtl(vform, temp1, src1);
2539 sxtl(vform, temp2, src2);
2540 AbsDiff(vform, dst, temp1, temp2, true);
2541 return dst;
2542 }
2543
2544 LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst,
2545 const LogicVRegister& src1,
2546 const LogicVRegister& src2) {
2547 SimVRegister temp1, temp2;
2548 sxtl2(vform, temp1, src1);
2549 sxtl2(vform, temp2, src2);
2550 AbsDiff(vform, dst, temp1, temp2, true);
2551 return dst;
2552 }
2553
2554 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
2555 const LogicVRegister& src1,
2556 const LogicVRegister& src2) {
2557 SimVRegister temp1, temp2;
2558 uxtl(vform, temp1, src1);
2559 uxtl(vform, temp2, src2);
2560 mul(vform, dst, temp1, temp2);
2561 return dst;
2562 }
2563
2564 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
2565 const LogicVRegister& src1,
2566 const LogicVRegister& src2) {
2567 SimVRegister temp1, temp2;
2568 uxtl2(vform, temp1, src1);
2569 uxtl2(vform, temp2, src2);
2570 mul(vform, dst, temp1, temp2);
2571 return dst;
2572 }
2573
2574 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
2575 const LogicVRegister& src1,
2576 const LogicVRegister& src2) {
2577 SimVRegister temp1, temp2;
2578 sxtl(vform, temp1, src1);
2579 sxtl(vform, temp2, src2);
2580 mul(vform, dst, temp1, temp2);
2581 return dst;
2582 }
2583
2584 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
2585 const LogicVRegister& src1,
2586 const LogicVRegister& src2) {
2587 SimVRegister temp1, temp2;
2588 sxtl2(vform, temp1, src1);
2589 sxtl2(vform, temp2, src2);
2590 mul(vform, dst, temp1, temp2);
2591 return dst;
2592 }
2593
2594 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
2595 const LogicVRegister& src1,
2596 const LogicVRegister& src2) {
2597 SimVRegister temp1, temp2;
2598 uxtl(vform, temp1, src1);
2599 uxtl(vform, temp2, src2);
2600 mls(vform, dst, temp1, temp2);
2601 return dst;
2602 }
2603
2604 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
2605 const LogicVRegister& src1,
2606 const LogicVRegister& src2) {
2607 SimVRegister temp1, temp2;
2608 uxtl2(vform, temp1, src1);
2609 uxtl2(vform, temp2, src2);
2610 mls(vform, dst, temp1, temp2);
2611 return dst;
2612 }
2613
2614 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
2615 const LogicVRegister& src1,
2616 const LogicVRegister& src2) {
2617 SimVRegister temp1, temp2;
2618 sxtl(vform, temp1, src1);
2619 sxtl(vform, temp2, src2);
2620 mls(vform, dst, temp1, temp2);
2621 return dst;
2622 }
2623
2624 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
2625 const LogicVRegister& src1,
2626 const LogicVRegister& src2) {
2627 SimVRegister temp1, temp2;
2628 sxtl2(vform, temp1, src1);
2629 sxtl2(vform, temp2, src2);
2630 mls(vform, dst, temp1, temp2);
2631 return dst;
2632 }
2633
2634 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
2635 const LogicVRegister& src1,
2636 const LogicVRegister& src2) {
2637 SimVRegister temp1, temp2;
2638 uxtl(vform, temp1, src1);
2639 uxtl(vform, temp2, src2);
2640 mla(vform, dst, temp1, temp2);
2641 return dst;
2642 }
2643
2644 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
2645 const LogicVRegister& src1,
2646 const LogicVRegister& src2) {
2647 SimVRegister temp1, temp2;
2648 uxtl2(vform, temp1, src1);
2649 uxtl2(vform, temp2, src2);
2650 mla(vform, dst, temp1, temp2);
2651 return dst;
2652 }
2653
2654 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
2655 const LogicVRegister& src1,
2656 const LogicVRegister& src2) {
2657 SimVRegister temp1, temp2;
2658 sxtl(vform, temp1, src1);
2659 sxtl(vform, temp2, src2);
2660 mla(vform, dst, temp1, temp2);
2661 return dst;
2662 }
2663
2664 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
2665 const LogicVRegister& src1,
2666 const LogicVRegister& src2) {
2667 SimVRegister temp1, temp2;
2668 sxtl2(vform, temp1, src1);
2669 sxtl2(vform, temp2, src2);
2670 mla(vform, dst, temp1, temp2);
2671 return dst;
2672 }
2673
2674 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
2675 const LogicVRegister& src1,
2676 const LogicVRegister& src2) {
2677 SimVRegister temp;
2678 LogicVRegister product = sqdmull(vform, temp, src1, src2);
2679 return add(vform, dst, dst, product).SignedSaturate(vform);
2680 }
2681
2682 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
2683 const LogicVRegister& src1,
2684 const LogicVRegister& src2) {
2685 SimVRegister temp;
2686 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2687 return add(vform, dst, dst, product).SignedSaturate(vform);
2688 }
2689
2690 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
2691 const LogicVRegister& src1,
2692 const LogicVRegister& src2) {
2693 SimVRegister temp;
2694 LogicVRegister product = sqdmull(vform, temp, src1, src2);
2695 return sub(vform, dst, dst, product).SignedSaturate(vform);
2696 }
2697
2698 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
2699 const LogicVRegister& src1,
2700 const LogicVRegister& src2) {
2701 SimVRegister temp;
2702 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2703 return sub(vform, dst, dst, product).SignedSaturate(vform);
2704 }
2705
2706 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
2707 const LogicVRegister& src1,
2708 const LogicVRegister& src2) {
2709 SimVRegister temp;
2710 LogicVRegister product = smull(vform, temp, src1, src2);
2711 return add(vform, dst, product, product).SignedSaturate(vform);
2712 }
2713
2714 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
2715 const LogicVRegister& src1,
2716 const LogicVRegister& src2) {
2717 SimVRegister temp;
2718 LogicVRegister product = smull2(vform, temp, src1, src2);
2719 return add(vform, dst, product, product).SignedSaturate(vform);
2720 }
2721
2722 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
2723 const LogicVRegister& src1,
2724 const LogicVRegister& src2, bool round) {
2725 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
2726 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
2727 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
2728
2729 int esize = LaneSizeInBitsFromFormat(vform);
2730 int round_const = round ? (1 << (esize - 2)) : 0;
2731 int64_t product;
2732
2733 dst.ClearForWrite(vform);
2734 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2735 product = src1.Int(vform, i) * src2.Int(vform, i);
2736 product += round_const;
2737 product = product >> (esize - 1);
2738
2739 if (product > MaxIntFromFormat(vform)) {
2740 product = MaxIntFromFormat(vform);
2741 } else if (product < MinIntFromFormat(vform)) {
2742 product = MinIntFromFormat(vform);
2743 }
2744 dst.SetInt(vform, i, product);
2745 }
2746 return dst;
2747 }
2748
2749 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
2750 const LogicVRegister& src1,
2751 const LogicVRegister& src2) {
2752 return sqrdmulh(vform, dst, src1, src2, false);
2753 }
2754
2755 LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst,
2756 const LogicVRegister& src1,
2757 const LogicVRegister& src2) {
2758 SimVRegister temp;
2759 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2760 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2761 return dst;
2762 }
2763
2764 LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst,
2765 const LogicVRegister& src1,
2766 const LogicVRegister& src2) {
2767 SimVRegister temp;
2768 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2769 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2770 return dst;
2771 }
2772
2773 LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst,
2774 const LogicVRegister& src1,
2775 const LogicVRegister& src2) {
2776 SimVRegister temp;
2777 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2778 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2779 return dst;
2780 }
2781
2782 LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst,
2783 const LogicVRegister& src1,
2784 const LogicVRegister& src2) {
2785 SimVRegister temp;
2786 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2787 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2788 return dst;
2789 }
2790
2791 LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst,
2792 const LogicVRegister& src1,
2793 const LogicVRegister& src2) {
2794 SimVRegister temp;
2795 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2796 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2797 return dst;
2798 }
2799
2800 LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst,
2801 const LogicVRegister& src1,
2802 const LogicVRegister& src2) {
2803 SimVRegister temp;
2804 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2805 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2806 return dst;
2807 }
2808
2809 LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst,
2810 const LogicVRegister& src1,
2811 const LogicVRegister& src2) {
2812 SimVRegister temp;
2813 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2814 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2815 return dst;
2816 }
2817
2818 LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst,
2819 const LogicVRegister& src1,
2820 const LogicVRegister& src2) {
2821 SimVRegister temp;
2822 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2823 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2824 return dst;
2825 }
2826
2827 LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst,
2828 const LogicVRegister& src1,
2829 const LogicVRegister& src2) {
2830 uint64_t result[16];
2831 int laneCount = LaneCountFromFormat(vform);
2832 int pairs = laneCount / 2;
2833 for (int i = 0; i < pairs; ++i) {
2834 result[2 * i] = src1.Uint(vform, 2 * i);
2835 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
2836 }
2837
2838 dst.SetUintArray(vform, result);
2839 return dst;
2840 }
2841
2842 LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst,
2843 const LogicVRegister& src1,
2844 const LogicVRegister& src2) {
2845 uint64_t result[16];
2846 int laneCount = LaneCountFromFormat(vform);
2847 int pairs = laneCount / 2;
2848 for (int i = 0; i < pairs; ++i) {
2849 result[2 * i] = src1.Uint(vform, (2 * i) + 1);
2850 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
2851 }
2852
2853 dst.SetUintArray(vform, result);
2854 return dst;
2855 }
2856
2857 LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst,
2858 const LogicVRegister& src1,
2859 const LogicVRegister& src2) {
2860 uint64_t result[16];
2861 int laneCount = LaneCountFromFormat(vform);
2862 int pairs = laneCount / 2;
2863 for (int i = 0; i < pairs; ++i) {
2864 result[2 * i] = src1.Uint(vform, i);
2865 result[(2 * i) + 1] = src2.Uint(vform, i);
2866 }
2867
2868 dst.SetUintArray(vform, result);
2869 return dst;
2870 }
2871
2872 LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst,
2873 const LogicVRegister& src1,
2874 const LogicVRegister& src2) {
2875 uint64_t result[16];
2876 int laneCount = LaneCountFromFormat(vform);
2877 int pairs = laneCount / 2;
2878 for (int i = 0; i < pairs; ++i) {
2879 result[2 * i] = src1.Uint(vform, pairs + i);
2880 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
2881 }
2882
2883 dst.SetUintArray(vform, result);
2884 return dst;
2885 }
2886
2887 LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst,
2888 const LogicVRegister& src1,
2889 const LogicVRegister& src2) {
2890 uint64_t result[32];
2891 int laneCount = LaneCountFromFormat(vform);
2892 for (int i = 0; i < laneCount; ++i) {
2893 result[i] = src1.Uint(vform, i);
2894 result[laneCount + i] = src2.Uint(vform, i);
2895 }
2896
2897 dst.ClearForWrite(vform);
2898 for (int i = 0; i < laneCount; ++i) {
2899 dst.SetUint(vform, i, result[2 * i]);
2900 }
2901 return dst;
2902 }
2903
2904 LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst,
2905 const LogicVRegister& src1,
2906 const LogicVRegister& src2) {
2907 uint64_t result[32];
2908 int laneCount = LaneCountFromFormat(vform);
2909 for (int i = 0; i < laneCount; ++i) {
2910 result[i] = src1.Uint(vform, i);
2911 result[laneCount + i] = src2.Uint(vform, i);
2912 }
2913
2914 dst.ClearForWrite(vform);
2915 for (int i = 0; i < laneCount; ++i) {
2916 dst.SetUint(vform, i, result[(2 * i) + 1]);
2917 }
2918 return dst;
2919 }
2920
2921 template <typename T>
2922 T Simulator::FPAdd(T op1, T op2) {
2923 T result = FPProcessNaNs(op1, op2);
2924 if (std::isnan(result)) return result;
2925
2926 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
2927 // inf + -inf returns the default NaN.
2928 FPProcessException();
2929 return FPDefaultNaN<T>();
2930 } else {
2931 // Other cases should be handled by standard arithmetic.
2932 return op1 + op2;
2933 }
2934 }
2935
2936 template <typename T>
2937 T Simulator::FPSub(T op1, T op2) {
2938 // NaNs should be handled elsewhere.
2939 DCHECK(!std::isnan(op1) && !std::isnan(op2));
2940
2941 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
2942 // inf - inf returns the default NaN.
2943 FPProcessException();
2944 return FPDefaultNaN<T>();
2945 } else {
2946 // Other cases should be handled by standard arithmetic.
2947 return op1 - op2;
2948 }
2949 }
2950
2951 template <typename T>
2952 T Simulator::FPMul(T op1, T op2) {
2953 // NaNs should be handled elsewhere.
2954 DCHECK(!std::isnan(op1) && !std::isnan(op2));
2955
2956 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2957 // inf * 0.0 returns the default NaN.
2958 FPProcessException();
2959 return FPDefaultNaN<T>();
2960 } else {
2961 // Other cases should be handled by standard arithmetic.
2962 return op1 * op2;
2963 }
2964 }
2965
2966 template <typename T>
2967 T Simulator::FPMulx(T op1, T op2) {
2968 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2969 // inf * 0.0 returns +/-2.0.
2970 T two = 2.0;
2971 return copysign(1.0, op1) * copysign(1.0, op2) * two;
2972 }
2973 return FPMul(op1, op2);
2974 }
2975
2976 template <typename T>
2977 T Simulator::FPMulAdd(T a, T op1, T op2) {
2978 T result = FPProcessNaNs3(a, op1, op2);
2979
2980 T sign_a = copysign(1.0, a);
2981 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
2982 bool isinf_prod = std::isinf(op1) || std::isinf(op2);
2983 bool operation_generates_nan =
2984 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0
2985 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf
2986 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
2987
2988 if (std::isnan(result)) {
2989 // Generated NaNs override quiet NaNs propagated from a.
2990 if (operation_generates_nan && IsQuietNaN(a)) {
2991 FPProcessException();
2992 return FPDefaultNaN<T>();
2993 } else {
2994 return result;
2995 }
2996 }
2997
2998 // If the operation would produce a NaN, return the default NaN.
2999 if (operation_generates_nan) {
3000 FPProcessException();
3001 return FPDefaultNaN<T>();
3002 }
3003
3004 // Work around broken fma implementations for exact zero results: The sign of
3005 // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3006 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3007 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3008 }
3009
3010 result = FusedMultiplyAdd(op1, op2, a);
3011 DCHECK(!std::isnan(result));
3012
3013 // Work around broken fma implementations for rounded zero results: If a is
3014 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3015 if ((a == 0.0) && (result == 0.0)) {
3016 return copysign(0.0, sign_prod);
3017 }
3018
3019 return result;
3020 }
3021
3022 template <typename T>
3023 T Simulator::FPDiv(T op1, T op2) {
3024 // NaNs should be handled elsewhere.
3025 DCHECK(!std::isnan(op1) && !std::isnan(op2));
3026
3027 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3028 // inf / inf and 0.0 / 0.0 return the default NaN.
3029 FPProcessException();
3030 return FPDefaultNaN<T>();
3031 } else {
3032 if (op2 == 0.0) {
3033 FPProcessException();
3034 if (!std::isnan(op1)) {
3035 double op1_sign = copysign(1.0, op1);
3036 double op2_sign = copysign(1.0, op2);
3037 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
3038 }
3039 }
3040
3041 // Other cases should be handled by standard arithmetic.
3042 return op1 / op2;
3043 }
3044 }
3045
3046 template <typename T>
3047 T Simulator::FPSqrt(T op) {
3048 if (std::isnan(op)) {
3049 return FPProcessNaN(op);
3050 } else if (op < 0.0) {
3051 FPProcessException();
3052 return FPDefaultNaN<T>();
3053 } else {
3054 return sqrt(op);
3055 }
3056 }
3057
3058 template <typename T>
3059 T Simulator::FPMax(T a, T b) {
3060 T result = FPProcessNaNs(a, b);
3061 if (std::isnan(result)) return result;
3062
3063 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3064 // a and b are zero, and the sign differs: return +0.0.
3065 return 0.0;
3066 } else {
3067 return (a > b) ? a : b;
3068 }
3069 }
3070
3071 template <typename T>
3072 T Simulator::FPMaxNM(T a, T b) {
3073 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3074 a = kFP64NegativeInfinity;
3075 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3076 b = kFP64NegativeInfinity;
3077 }
3078
3079 T result = FPProcessNaNs(a, b);
3080 return std::isnan(result) ? result : FPMax(a, b);
3081 }
3082
3083 template <typename T>
3084 T Simulator::FPMin(T a, T b) {
3085 T result = FPProcessNaNs(a, b);
3086 if (std::isnan(result)) return result;
3087
3088 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3089 // a and b are zero, and the sign differs: return -0.0.
3090 return -0.0;
3091 } else {
3092 return (a < b) ? a : b;
3093 }
3094 }
3095
3096 template <typename T>
3097 T Simulator::FPMinNM(T a, T b) {
3098 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3099 a = kFP64PositiveInfinity;
3100 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3101 b = kFP64PositiveInfinity;
3102 }
3103
3104 T result = FPProcessNaNs(a, b);
3105 return std::isnan(result) ? result : FPMin(a, b);
3106 }
3107
3108 template <typename T>
3109 T Simulator::FPRecipStepFused(T op1, T op2) {
3110 const T two = 2.0;
3111 if ((std::isinf(op1) && (op2 == 0.0)) ||
3112 ((op1 == 0.0) && (std::isinf(op2)))) {
3113 return two;
3114 } else if (std::isinf(op1) || std::isinf(op2)) {
3115 // Return +inf if signs match, otherwise -inf.
3116 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3117 : kFP64NegativeInfinity;
3118 } else {
3119 return FusedMultiplyAdd(op1, op2, two);
3120 }
3121 }
3122
3123 template <typename T>
3124 T Simulator::FPRSqrtStepFused(T op1, T op2) {
3125 const T one_point_five = 1.5;
3126 const T two = 2.0;
3127
3128 if ((std::isinf(op1) && (op2 == 0.0)) ||
3129 ((op1 == 0.0) && (std::isinf(op2)))) {
3130 return one_point_five;
3131 } else if (std::isinf(op1) || std::isinf(op2)) {
3132 // Return +inf if signs match, otherwise -inf.
3133 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3134 : kFP64NegativeInfinity;
3135 } else {
3136 // The multiply-add-halve operation must be fully fused, so avoid interim
3137 // rounding by checking which operand can be losslessly divided by two
3138 // before doing the multiply-add.
3139 if (std::isnormal(op1 / two)) {
3140 return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3141 } else if (std::isnormal(op2 / two)) {
3142 return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3143 } else {
3144 // Neither operand is normal after halving: the result is dominated by
3145 // the addition term, so just return that.
3146 return one_point_five;
3147 }
3148 }
3149 }
3150
3151 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3152 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3153 (value == kFP64NegativeInfinity)) {
3154 return value;
3155 } else if (std::isnan(value)) {
3156 return FPProcessNaN(value);
3157 }
3158
3159 double int_result = std::floor(value);
3160 double error = value - int_result;
3161 switch (round_mode) {
3162 case FPTieAway: {
3163 // Take care of correctly handling the range ]-0.5, -0.0], which must
3164 // yield -0.0.
3165 if ((-0.5 < value) && (value < 0.0)) {
3166 int_result = -0.0;
3167
3168 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3169 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3170 // result is positive, round up.
3171 int_result++;
3172 }
3173 break;
3174 }
3175 case FPTieEven: {
3176 // Take care of correctly handling the range [-0.5, -0.0], which must
3177 // yield -0.0.
3178 if ((-0.5 <= value) && (value < 0.0)) {
3179 int_result = -0.0;
3180
3181 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3182 // result is odd, round up.
3183 } else if ((error > 0.5) ||
3184 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3185 int_result++;
3186 }
3187 break;
3188 }
3189 case FPZero: {
3190 // If value>0 then we take floor(value)
3191 // otherwise, ceil(value).
3192 if (value < 0) {
3193 int_result = ceil(value);
3194 }
3195 break;
3196 }
3197 case FPNegativeInfinity: {
3198 // We always use floor(value).
3199 break;
3200 }
3201 case FPPositiveInfinity: {
3202 // Take care of correctly handling the range ]-1.0, -0.0], which must
3203 // yield -0.0.
3204 if ((-1.0 < value) && (value < 0.0)) {
3205 int_result = -0.0;
3206
3207 // If the error is non-zero, round up.
3208 } else if (error > 0.0) {
3209 int_result++;
3210 }
3211 break;
3212 }
3213 default:
3214 UNIMPLEMENTED();
3215 }
3216 return int_result;
3217 }
3218
3219 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3220 value = FPRoundInt(value, rmode);
3221 if (value >= kWMaxInt) {
3222 return kWMaxInt;
3223 } else if (value < kWMinInt) {
3224 return kWMinInt;
3225 }
3226 return std::isnan(value) ? 0 : static_cast<int32_t>(value);
3227 }
3228
3229 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3230 value = FPRoundInt(value, rmode);
3231 if (value >= kXMaxInt) {
3232 return kXMaxInt;
3233 } else if (value < kXMinInt) {
3234 return kXMinInt;
3235 }
3236 return std::isnan(value) ? 0 : static_cast<int64_t>(value);
3237 }
3238
3239 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3240 value = FPRoundInt(value, rmode);
3241 if (value >= kWMaxUInt) {
3242 return kWMaxUInt;
3243 } else if (value < 0.0) {
3244 return 0;
3245 }
3246 return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
3247 }
3248
3249 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3250 value = FPRoundInt(value, rmode);
3251 if (value >= kXMaxUInt) {
3252 return kXMaxUInt;
3253 } else if (value < 0.0) {
3254 return 0;
3255 }
3256 return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
3257 }
3258
3259 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
3260 template <typename T> \
3261 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3262 const LogicVRegister& src1, \
3263 const LogicVRegister& src2) { \
3264 dst.ClearForWrite(vform); \
3265 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
3266 T op1 = src1.Float<T>(i); \
3267 T op2 = src2.Float<T>(i); \
3268 T result; \
3269 if (PROCNAN) { \
3270 result = FPProcessNaNs(op1, op2); \
3271 if (!std::isnan(result)) { \
3272 result = OP(op1, op2); \
3273 } \
3274 } else { \
3275 result = OP(op1, op2); \
3276 } \
3277 dst.SetFloat(i, result); \
3278 } \
3279 return dst; \
3280 } \
3281 \
3282 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3283 const LogicVRegister& src1, \
3284 const LogicVRegister& src2) { \
3285 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { \
3286 FN<float>(vform, dst, src1, src2); \
3287 } else { \
3288 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); \
3289 FN<double>(vform, dst, src1, src2); \
3290 } \
3291 return dst; \
3292 }
3293 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3294 #undef DEFINE_NEON_FP_VECTOR_OP
3295
3296 LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst,
3297 const LogicVRegister& src1,
3298 const LogicVRegister& src2) {
3299 SimVRegister temp;
3300 LogicVRegister product = fmul(vform, temp, src1, src2);
3301 return fneg(vform, dst, product);
3302 }
3303
3304 template <typename T>
3305 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3306 const LogicVRegister& src1,
3307 const LogicVRegister& src2) {
3308 dst.ClearForWrite(vform);
3309 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3310 T op1 = -src1.Float<T>(i);
3311 T op2 = src2.Float<T>(i);
3312 T result = FPProcessNaNs(op1, op2);
3313 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
3314 }
3315 return dst;
3316 }
3317
3318 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3319 const LogicVRegister& src1,
3320 const LogicVRegister& src2) {
3321 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3322 frecps<float>(vform, dst, src1, src2);
3323 } else {
3324 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3325 frecps<double>(vform, dst, src1, src2);
3326 }
3327 return dst;
3328 }
3329
3330 template <typename T>
3331 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3332 const LogicVRegister& src1,
3333 const LogicVRegister& src2) {
3334 dst.ClearForWrite(vform);
3335 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3336 T op1 = -src1.Float<T>(i);
3337 T op2 = src2.Float<T>(i);
3338 T result = FPProcessNaNs(op1, op2);
3339 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
3340 }
3341 return dst;
3342 }
3343
3344 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3345 const LogicVRegister& src1,
3346 const LogicVRegister& src2) {
3347 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3348 frsqrts<float>(vform, dst, src1, src2);
3349 } else {
3350 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3351 frsqrts<double>(vform, dst, src1, src2);
3352 }
3353 return dst;
3354 }
3355
3356 template <typename T>
3357 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3358 const LogicVRegister& src1,
3359 const LogicVRegister& src2, Condition cond) {
3360 dst.ClearForWrite(vform);
3361 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3362 bool result = false;
3363 T op1 = src1.Float<T>(i);
3364 T op2 = src2.Float<T>(i);
3365 T nan_result = FPProcessNaNs(op1, op2);
3366 if (!std::isnan(nan_result)) {
3367 switch (cond) {
3368 case eq:
3369 result = (op1 == op2);
3370 break;
3371 case ge:
3372 result = (op1 >= op2);
3373 break;
3374 case gt:
3375 result = (op1 > op2);
3376 break;
3377 case le:
3378 result = (op1 <= op2);
3379 break;
3380 case lt:
3381 result = (op1 < op2);
3382 break;
3383 default:
3384 UNREACHABLE();
3385 }
3386 }
3387 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
3388 }
3389 return dst;
3390 }
3391
3392 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3393 const LogicVRegister& src1,
3394 const LogicVRegister& src2, Condition cond) {
3395 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3396 fcmp<float>(vform, dst, src1, src2, cond);
3397 } else {
3398 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3399 fcmp<double>(vform, dst, src1, src2, cond);
3400 }
3401 return dst;
3402 }
3403
3404 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst,
3405 const LogicVRegister& src, Condition cond) {
3406 SimVRegister temp;
3407 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3408 LogicVRegister zero_reg =
3409 dup_immediate(vform, temp, bit_cast<uint32_t>(0.0f));
3410 fcmp<float>(vform, dst, src, zero_reg, cond);
3411 } else {
3412 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3413 LogicVRegister zero_reg =
3414 dup_immediate(vform, temp, bit_cast<uint64_t>(0.0));
3415 fcmp<double>(vform, dst, src, zero_reg, cond);
3416 }
3417 return dst;
3418 }
3419
3420 LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst,
3421 const LogicVRegister& src1,
3422 const LogicVRegister& src2, Condition cond) {
3423 SimVRegister temp1, temp2;
3424 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3425 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
3426 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
3427 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
3428 } else {
3429 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3430 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
3431 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
3432 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
3433 }
3434 return dst;
3435 }
3436
3437 template <typename T>
3438 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3439 const LogicVRegister& src1,
3440 const LogicVRegister& src2) {
3441 dst.ClearForWrite(vform);
3442 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3443 T op1 = src1.Float<T>(i);
3444 T op2 = src2.Float<T>(i);
3445 T acc = dst.Float<T>(i);
3446 T result = FPMulAdd(acc, op1, op2);
3447 dst.SetFloat(i, result);
3448 }
3449 return dst;
3450 }
3451
3452 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3453 const LogicVRegister& src1,
3454 const LogicVRegister& src2) {
3455 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3456 fmla<float>(vform, dst, src1, src2);
3457 } else {
3458 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3459 fmla<double>(vform, dst, src1, src2);
3460 }
3461 return dst;
3462 }
3463
3464 template <typename T>
3465 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3466 const LogicVRegister& src1,
3467 const LogicVRegister& src2) {
3468 dst.ClearForWrite(vform);
3469 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3470 T op1 = -src1.Float<T>(i);
3471 T op2 = src2.Float<T>(i);
3472 T acc = dst.Float<T>(i);
3473 T result = FPMulAdd(acc, op1, op2);
3474 dst.SetFloat(i, result);
3475 }
3476 return dst;
3477 }
3478
3479 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3480 const LogicVRegister& src1,
3481 const LogicVRegister& src2) {
3482 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3483 fmls<float>(vform, dst, src1, src2);
3484 } else {
3485 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3486 fmls<double>(vform, dst, src1, src2);
3487 }
3488 return dst;
3489 }
3490
3491 template <typename T>
3492 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3493 const LogicVRegister& src) {
3494 dst.ClearForWrite(vform);
3495 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3496 T op = src.Float<T>(i);
3497 op = -op;
3498 dst.SetFloat(i, op);
3499 }
3500 return dst;
3501 }
3502
3503 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3504 const LogicVRegister& src) {
3505 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3506 fneg<float>(vform, dst, src);
3507 } else {
3508 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3509 fneg<double>(vform, dst, src);
3510 }
3511 return dst;
3512 }
3513
3514 template <typename T>
3515 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3516 const LogicVRegister& src) {
3517 dst.ClearForWrite(vform);
3518 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3519 T op = src.Float<T>(i);
3520 if (copysign(1.0, op) < 0.0) {
3521 op = -op;
3522 }
3523 dst.SetFloat(i, op);
3524 }
3525 return dst;
3526 }
3527
3528 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3529 const LogicVRegister& src) {
3530 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3531 fabs_<float>(vform, dst, src);
3532 } else {
3533 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3534 fabs_<double>(vform, dst, src);
3535 }
3536 return dst;
3537 }
3538
3539 LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst,
3540 const LogicVRegister& src1,
3541 const LogicVRegister& src2) {
3542 SimVRegister temp;
3543 fsub(vform, temp, src1, src2);
3544 fabs_(vform, dst, temp);
3545 return dst;
3546 }
3547
3548 LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst,
3549 const LogicVRegister& src) {
3550 dst.ClearForWrite(vform);
3551 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3552 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3553 float result = FPSqrt(src.Float<float>(i));
3554 dst.SetFloat(i, result);
3555 }
3556 } else {
3557 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3558 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3559 double result = FPSqrt(src.Float<double>(i));
3560 dst.SetFloat(i, result);
3561 }
3562 }
3563 return dst;
3564 }
3565
3566 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
3567 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3568 const LogicVRegister& src1, \
3569 const LogicVRegister& src2) { \
3570 SimVRegister temp1, temp2; \
3571 uzp1(vform, temp1, src1, src2); \
3572 uzp2(vform, temp2, src1, src2); \
3573 FN(vform, dst, temp1, temp2); \
3574 return dst; \
3575 } \
3576 \
3577 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3578 const LogicVRegister& src) { \
3579 if (vform == kFormatS) { \
3580 float result = OP(src.Float<float>(0), src.Float<float>(1)); \
3581 dst.SetFloat(0, result); \
3582 } else { \
3583 DCHECK_EQ(vform, kFormatD); \
3584 double result = OP(src.Float<double>(0), src.Float<double>(1)); \
3585 dst.SetFloat(0, result); \
3586 } \
3587 dst.ClearForWrite(vform); \
3588 return dst; \
3589 }
3590 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
3591 #undef DEFINE_NEON_FP_PAIR_OP
3592
3593 LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst,
3594 const LogicVRegister& src, FPMinMaxOp Op) {
3595 DCHECK_EQ(vform, kFormat4S);
3596 USE(vform);
3597 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
3598 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
3599 float result = (this->*Op)(result1, result2);
3600 dst.ClearForWrite(kFormatS);
3601 dst.SetFloat<float>(0, result);
3602 return dst;
3603 }
3604
3605 LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst,
3606 const LogicVRegister& src) {
3607 return FMinMaxV(vform, dst, src, &Simulator::FPMax);
3608 }
3609
3610 LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst,
3611 const LogicVRegister& src) {
3612 return FMinMaxV(vform, dst, src, &Simulator::FPMin);
3613 }
3614
3615 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst,
3616 const LogicVRegister& src) {
3617 return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM);
3618 }
3619
3620 LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst,
3621 const LogicVRegister& src) {
3622 return FMinMaxV(vform, dst, src, &Simulator::FPMinNM);
3623 }
3624
3625 LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst,
3626 const LogicVRegister& src1,
3627 const LogicVRegister& src2, int index) {
3628 dst.ClearForWrite(vform);
3629 SimVRegister temp;
3630 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3631 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3632 fmul<float>(vform, dst, src1, index_reg);
3633 } else {
3634 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3635 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3636 fmul<double>(vform, dst, src1, index_reg);
3637 }
3638 return dst;
3639 }
3640
3641 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3642 const LogicVRegister& src1,
3643 const LogicVRegister& src2, int index) {
3644 dst.ClearForWrite(vform);
3645 SimVRegister temp;
3646 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3647 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3648 fmla<float>(vform, dst, src1, index_reg);
3649 } else {
3650 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3651 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3652 fmla<double>(vform, dst, src1, index_reg);
3653 }
3654 return dst;
3655 }
3656
3657 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3658 const LogicVRegister& src1,
3659 const LogicVRegister& src2, int index) {
3660 dst.ClearForWrite(vform);
3661 SimVRegister temp;
3662 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3663 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3664 fmls<float>(vform, dst, src1, index_reg);
3665 } else {
3666 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3667 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3668 fmls<double>(vform, dst, src1, index_reg);
3669 }
3670 return dst;
3671 }
3672
3673 LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst,
3674 const LogicVRegister& src1,
3675 const LogicVRegister& src2, int index) {
3676 dst.ClearForWrite(vform);
3677 SimVRegister temp;
3678 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3679 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3680 fmulx<float>(vform, dst, src1, index_reg);
3681
3682 } else {
3683 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3684 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3685 fmulx<double>(vform, dst, src1, index_reg);
3686 }
3687 return dst;
3688 }
3689
3690 LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst,
3691 const LogicVRegister& src,
3692 FPRounding rounding_mode,
3693 bool inexact_exception) {
3694 dst.ClearForWrite(vform);
3695 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3696 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3697 float input = src.Float<float>(i);
3698 float rounded = FPRoundInt(input, rounding_mode);
3699 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3700 FPProcessException();
3701 }
3702 dst.SetFloat<float>(i, rounded);
3703 }
3704 } else {
3705 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3706 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3707 double input = src.Float<double>(i);
3708 double rounded = FPRoundInt(input, rounding_mode);
3709 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3710 FPProcessException();
3711 }
3712 dst.SetFloat<double>(i, rounded);
3713 }
3714 }
3715 return dst;
3716 }
3717
3718 LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst,
3719 const LogicVRegister& src,
3720 FPRounding rounding_mode, int fbits) {
3721 dst.ClearForWrite(vform);
3722 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3723 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3724 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3725 dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
3726 }
3727 } else {
3728 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3729 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3730 double op = src.Float<double>(i) * std::pow(2.0, fbits);
3731 dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
3732 }
3733 }
3734 return dst;
3735 }
3736
3737 LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst,
3738 const LogicVRegister& src,
3739 FPRounding rounding_mode, int fbits) {
3740 dst.ClearForWrite(vform);
3741 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3742 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3743 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3744 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
3745 }
3746 } else {
3747 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3748 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3749 double op = src.Float<double>(i) * std::pow(2.0, fbits);
3750 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
3751 }
3752 }
3753 return dst;
3754 }
3755
3756 LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst,
3757 const LogicVRegister& src) {
3758 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3759 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3760 dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
3761 }
3762 } else {
3763 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3764 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3765 dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
3766 }
3767 }
3768 return dst;
3769 }
3770
3771 LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst,
3772 const LogicVRegister& src) {
3773 int lane_count = LaneCountFromFormat(vform);
3774 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3775 for (int i = 0; i < lane_count; i++) {
3776 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
3777 }
3778 } else {
3779 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3780 for (int i = 0; i < lane_count; i++) {
3781 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
3782 }
3783 }
3784 return dst;
3785 }
3786
3787 LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst,
3788 const LogicVRegister& src) {
3789 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3790 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3791 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
3792 }
3793 } else {
3794 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3795 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3796 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
3797 }
3798 }
3799 return dst;
3800 }
3801
3802 LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst,
3803 const LogicVRegister& src) {
3804 int lane_count = LaneCountFromFormat(vform) / 2;
3805 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3806 for (int i = lane_count - 1; i >= 0; i--) {
3807 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
3808 }
3809 } else {
3810 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3811 for (int i = lane_count - 1; i >= 0; i--) {
3812 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
3813 }
3814 }
3815 return dst;
3816 }
3817
3818 LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst,
3819 const LogicVRegister& src) {
3820 dst.ClearForWrite(vform);
3821 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3822 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3823 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
3824 }
3825 return dst;
3826 }
3827
3828 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst,
3829 const LogicVRegister& src) {
3830 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3831 int lane_count = LaneCountFromFormat(vform) / 2;
3832 for (int i = lane_count - 1; i >= 0; i--) {
3833 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
3834 }
3835 return dst;
3836 }
3837
3838 // Based on reference C function recip_sqrt_estimate from ARM ARM.
3839 double Simulator::recip_sqrt_estimate(double a) {
3840 int q0, q1, s;
3841 double r;
3842 if (a < 0.5) {
3843 q0 = static_cast<int>(a * 512.0);
3844 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
3845 } else {
3846 q1 = static_cast<int>(a * 256.0);
3847 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
3848 }
3849 s = static_cast<int>(256.0 * r + 0.5);
3850 return static_cast<double>(s) / 256.0;
3851 }
3852
3853 namespace {
3854
3855 inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
3856 return unsigned_bitextract_64(start_bit, end_bit, val);
3857 }
3858
3859 } // anonymous namespace
3860
3861 template <typename T>
3862 T Simulator::FPRecipSqrtEstimate(T op) {
3863 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
3864 "T must be a float or double");
3865
3866 if (std::isnan(op)) {
3867 return FPProcessNaN(op);
3868 } else if (op == 0.0) {
3869 if (copysign(1.0, op) < 0.0) {
3870 return kFP64NegativeInfinity;
3871 } else {
3872 return kFP64PositiveInfinity;
3873 }
3874 } else if (copysign(1.0, op) < 0.0) {
3875 FPProcessException();
3876 return FPDefaultNaN<T>();
3877 } else if (std::isinf(op)) {
3878 return 0.0;
3879 } else {
3880 uint64_t fraction;
3881 int32_t exp, result_exp;
3882
3883 if (sizeof(T) == sizeof(float)) {
3884 exp = static_cast<int32_t>(float_exp(op));
3885 fraction = float_mantissa(op);
3886 fraction <<= 29;
3887 } else {
3888 exp = static_cast<int32_t>(double_exp(op));
3889 fraction = double_mantissa(op);
3890 }
3891
3892 if (exp == 0) {
3893 while (Bits(fraction, 51, 51) == 0) {
3894 fraction = Bits(fraction, 50, 0) << 1;
3895 exp -= 1;
3896 }
3897 fraction = Bits(fraction, 50, 0) << 1;
3898 }
3899
3900 double scaled;
3901 if (Bits(exp, 0, 0) == 0) {
3902 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
3903 } else {
3904 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);
3905 }
3906
3907 if (sizeof(T) == sizeof(float)) {
3908 result_exp = (380 - exp) / 2;
3909 } else {
3910 result_exp = (3068 - exp) / 2;
3911 }
3912
3913 uint64_t estimate = bit_cast<uint64_t>(recip_sqrt_estimate(scaled));
3914
3915 if (sizeof(T) == sizeof(float)) {
3916 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
3917 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
3918 return float_pack(0, exp_bits, est_bits);
3919 } else {
3920 return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
3921 }
3922 }
3923 }
3924
3925 LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst,
3926 const LogicVRegister& src) {
3927 dst.ClearForWrite(vform);
3928 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3929 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3930 float input = src.Float<float>(i);
3931 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
3932 }
3933 } else {
3934 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3935 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3936 double input = src.Float<double>(i);
3937 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
3938 }
3939 }
3940 return dst;
3941 }
3942
3943 template <typename T>
3944 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
3945 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
3946 "T must be a float or double");
3947 uint32_t sign;
3948
3949 if (sizeof(T) == sizeof(float)) {
3950 sign = float_sign(op);
3951 } else {
3952 sign = double_sign(op);
3953 }
3954
3955 if (std::isnan(op)) {
3956 return FPProcessNaN(op);
3957 } else if (std::isinf(op)) {
3958 return (sign == 1) ? -0.0 : 0.0;
3959 } else if (op == 0.0) {
3960 FPProcessException(); // FPExc_DivideByZero exception.
3961 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
3962 } else if (((sizeof(T) == sizeof(float)) &&
3963 (std::fabs(op) < std::pow(2.0, -128.0))) ||
3964 ((sizeof(T) == sizeof(double)) &&
3965 (std::fabs(op) < std::pow(2.0, -1024.0)))) {
3966 bool overflow_to_inf = false;
3967 switch (rounding) {
3968 case FPTieEven:
3969 overflow_to_inf = true;
3970 break;
3971 case FPPositiveInfinity:
3972 overflow_to_inf = (sign == 0);
3973 break;
3974 case FPNegativeInfinity:
3975 overflow_to_inf = (sign == 1);
3976 break;
3977 case FPZero:
3978 overflow_to_inf = false;
3979 break;
3980 default:
3981 break;
3982 }
3983 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
3984 if (overflow_to_inf) {
3985 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
3986 } else {
3987 // Return FPMaxNormal(sign).
3988 if (sizeof(T) == sizeof(float)) {
3989 return float_pack(sign, 0xfe, 0x07fffff);
3990 } else {
3991 return double_pack(sign, 0x7fe, 0x0fffffffffffffl);
3992 }
3993 }
3994 } else {
3995 uint64_t fraction;
3996 int32_t exp, result_exp;
3997 uint32_t sign;
3998
3999 if (sizeof(T) == sizeof(float)) {
4000 sign = float_sign(op);
4001 exp = static_cast<int32_t>(float_exp(op));
4002 fraction = float_mantissa(op);
4003 fraction <<= 29;
4004 } else {
4005 sign = double_sign(op);
4006 exp = static_cast<int32_t>(double_exp(op));
4007 fraction = double_mantissa(op);
4008 }
4009
4010 if (exp == 0) {
4011 if (Bits(fraction, 51, 51) == 0) {
4012 exp -= 1;
4013 fraction = Bits(fraction, 49, 0) << 2;
4014 } else {
4015 fraction = Bits(fraction, 50, 0) << 1;
4016 }
4017 }
4018
4019 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4020
4021 if (sizeof(T) == sizeof(float)) {
4022 result_exp = 253 - exp;
4023 } else {
4024 result_exp = 2045 - exp;
4025 }
4026
4027 double estimate = recip_estimate(scaled);
4028
4029 fraction = double_mantissa(estimate);
4030 if (result_exp == 0) {
4031 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4032 } else if (result_exp == -1) {
4033 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4034 result_exp = 0;
4035 }
4036 if (sizeof(T) == sizeof(float)) {
4037 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4038 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
4039 return float_pack(sign, exp_bits, frac_bits);
4040 } else {
4041 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4042 }
4043 }
4044 }
4045
4046 LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst,
4047 const LogicVRegister& src, FPRounding round) {
4048 dst.ClearForWrite(vform);
4049 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4050 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4051 float input = src.Float<float>(i);
4052 dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4053 }
4054 } else {
4055 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4056 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4057 double input = src.Float<double>(i);
4058 dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4059 }
4060 }
4061 return dst;
4062 }
4063
4064 LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst,
4065 const LogicVRegister& src) {
4066 dst.ClearForWrite(vform);
4067 uint64_t operand;
4068 uint32_t result;
4069 double dp_operand, dp_result;
4070 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4071 operand = src.Uint(vform, i);
4072 if (operand <= 0x3FFFFFFF) {
4073 result = 0xFFFFFFFF;
4074 } else {
4075 dp_operand = operand * std::pow(2.0, -32);
4076 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4077 result = static_cast<uint32_t>(dp_result);
4078 }
4079 dst.SetUint(vform, i, result);
4080 }
4081 return dst;
4082 }
4083
4084 // Based on reference C function recip_estimate from ARM ARM.
4085 double Simulator::recip_estimate(double a) {
4086 int q, s;
4087 double r;
4088 q = static_cast<int>(a * 512.0);
4089 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4090 s = static_cast<int>(256.0 * r + 0.5);
4091 return static_cast<double>(s) / 256.0;
4092 }
4093
4094 LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst,
4095 const LogicVRegister& src) {
4096 dst.ClearForWrite(vform);
4097 uint64_t operand;
4098 uint32_t result;
4099 double dp_operand, dp_result;
4100 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4101 operand = src.Uint(vform, i);
4102 if (operand <= 0x7FFFFFFF) {
4103 result = 0xFFFFFFFF;
4104 } else {
4105 dp_operand = operand * std::pow(2.0, -32);
4106 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4107 result = static_cast<uint32_t>(dp_result);
4108 }
4109 dst.SetUint(vform, i, result);
4110 }
4111 return dst;
4112 }
4113
4114 template <typename T>
4115 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4116 const LogicVRegister& src) {
4117 dst.ClearForWrite(vform);
4118 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4119 T op = src.Float<T>(i);
4120 T result;
4121 if (std::isnan(op)) {
4122 result = FPProcessNaN(op);
4123 } else {
4124 int exp;
4125 uint32_t sign;
4126 if (sizeof(T) == sizeof(float)) {
4127 sign = float_sign(op);
4128 exp = static_cast<int>(float_exp(op));
4129 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
4130 result = float_pack(sign, exp, 0);
4131 } else {
4132 sign = double_sign(op);
4133 exp = static_cast<int>(double_exp(op));
4134 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
4135 result = double_pack(sign, exp, 0);
4136 }
4137 }
4138 dst.SetFloat(i, result);
4139 }
4140 return dst;
4141 }
4142
4143 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4144 const LogicVRegister& src) {
4145 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4146 frecpx<float>(vform, dst, src);
4147 } else {
4148 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4149 frecpx<double>(vform, dst, src);
4150 }
4151 return dst;
4152 }
4153
4154 LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst,
4155 const LogicVRegister& src, int fbits,
4156 FPRounding round) {
4157 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4158 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4159 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4160 dst.SetFloat<float>(i, result);
4161 } else {
4162 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4163 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4164 dst.SetFloat<double>(i, result);
4165 }
4166 }
4167 return dst;
4168 }
4169
4170 LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst,
4171 const LogicVRegister& src, int fbits,
4172 FPRounding round) {
4173 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4174 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4175 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4176 dst.SetFloat<float>(i, result);
4177 } else {
4178 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4179 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4180 dst.SetFloat<double>(i, result);
4181 }
4182 }
4183 return dst;
4184 }
4185
4186 #endif // USE_SIMULATOR
4187
4188 } // namespace internal
4189 } // namespace v8
4190
4191 #endif // V8_TARGET_ARCH_ARM64
OLDNEW
« no previous file with comments | « src/arm64/simulator-arm64.cc ('k') | src/arm64/utils-arm64.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698