Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(251)

Side by Side Diff: src/arm64/simulator-logic-arm64.cc

Issue 2812573003: Reland "ARM64: Add NEON support" (Closed)
Patch Set: Add trace directory to gitignore Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm64/simulator-arm64.cc ('k') | src/arm64/utils-arm64.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #if V8_TARGET_ARCH_ARM64
6
7 #include <cmath>
8 #include "src/arm64/simulator-arm64.h"
9
10 namespace v8 {
11 namespace internal {
12
13 #if defined(USE_SIMULATOR)
14
15 namespace {
16
17 // See FPRound for a description of this function.
18 inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa,
19 FPRounding round_mode) {
20 uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(
21 sign, exponent, mantissa, round_mode);
22 return bit_cast<double>(bits);
23 }
24
25 // See FPRound for a description of this function.
26 inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa,
27 FPRounding round_mode) {
28 uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(
29 sign, exponent, mantissa, round_mode);
30 return bit_cast<float>(bits);
31 }
32
33 // See FPRound for a description of this function.
34 inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent,
35 uint64_t mantissa, FPRounding round_mode) {
36 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
37 sign, exponent, mantissa, round_mode);
38 }
39
40 } // namespace
41
42 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
43 if (src >= 0) {
44 return UFixedToDouble(src, fbits, round);
45 } else if (src == INT64_MIN) {
46 return -UFixedToDouble(src, fbits, round);
47 } else {
48 return -UFixedToDouble(-src, fbits, round);
49 }
50 }
51
52 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
53 // An input of 0 is a special case because the result is effectively
54 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
55 if (src == 0) {
56 return 0.0;
57 }
58
59 // Calculate the exponent. The highest significant bit will have the value
60 // 2^exponent.
61 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
62 const int64_t exponent = highest_significant_bit - fbits;
63
64 return FPRoundToDouble(0, exponent, src, round);
65 }
66
67 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
68 if (src >= 0) {
69 return UFixedToFloat(src, fbits, round);
70 } else if (src == INT64_MIN) {
71 return -UFixedToFloat(src, fbits, round);
72 } else {
73 return -UFixedToFloat(-src, fbits, round);
74 }
75 }
76
77 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
78 // An input of 0 is a special case because the result is effectively
79 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
80 if (src == 0) {
81 return 0.0f;
82 }
83
84 // Calculate the exponent. The highest significant bit will have the value
85 // 2^exponent.
86 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
87 const int32_t exponent = highest_significant_bit - fbits;
88
89 return FPRoundToFloat(0, exponent, src, round);
90 }
91
92 double Simulator::FPToDouble(float value) {
93 switch (std::fpclassify(value)) {
94 case FP_NAN: {
95 if (IsSignallingNaN(value)) {
96 FPProcessException();
97 }
98 if (DN()) return kFP64DefaultNaN;
99
100 // Convert NaNs as the processor would:
101 // - The sign is propagated.
102 // - The mantissa is transferred entirely, except that the top bit is
103 // forced to '1', making the result a quiet NaN. The unused (low-order)
104 // mantissa bits are set to 0.
105 uint32_t raw = bit_cast<uint32_t>(value);
106
107 uint64_t sign = raw >> 31;
108 uint64_t exponent = (1 << kDoubleExponentBits) - 1;
109 uint64_t mantissa = unsigned_bitextract_64(21, 0, raw);
110
111 // Unused low-order bits remain zero.
112 mantissa <<= (kDoubleMantissaBits - kFloatMantissaBits);
113
114 // Force a quiet NaN.
115 mantissa |= (UINT64_C(1) << (kDoubleMantissaBits - 1));
116
117 return double_pack(sign, exponent, mantissa);
118 }
119
120 case FP_ZERO:
121 case FP_NORMAL:
122 case FP_SUBNORMAL:
123 case FP_INFINITE: {
124 // All other inputs are preserved in a standard cast, because every value
125 // representable using an IEEE-754 float is also representable using an
126 // IEEE-754 double.
127 return static_cast<double>(value);
128 }
129 }
130
131 UNREACHABLE();
132 return kFP64DefaultNaN;
133 }
134
135 float Simulator::FPToFloat(float16 value) {
136 uint32_t sign = value >> 15;
137 uint32_t exponent =
138 unsigned_bitextract_32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
139 kFloat16MantissaBits, value);
140 uint32_t mantissa =
141 unsigned_bitextract_32(kFloat16MantissaBits - 1, 0, value);
142
143 switch (float16classify(value)) {
144 case FP_ZERO:
145 return (sign == 0) ? 0.0f : -0.0f;
146
147 case FP_INFINITE:
148 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
149
150 case FP_SUBNORMAL: {
151 // Calculate shift required to put mantissa into the most-significant bits
152 // of the destination mantissa.
153 int shift = CountLeadingZeros(mantissa << (32 - 10), 32);
154
155 // Shift mantissa and discard implicit '1'.
156 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
157 mantissa &= (1 << kFloatMantissaBits) - 1;
158
159 // Adjust the exponent for the shift applied, and rebias.
160 exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias);
161 break;
162 }
163
164 case FP_NAN: {
165 if (IsSignallingNaN(value)) {
166 FPProcessException();
167 }
168 if (DN()) return kFP32DefaultNaN;
169
170 // Convert NaNs as the processor would:
171 // - The sign is propagated.
172 // - The mantissa is transferred entirely, except that the top bit is
173 // forced to '1', making the result a quiet NaN. The unused (low-order)
174 // mantissa bits are set to 0.
175 exponent = (1 << kFloatExponentBits) - 1;
176
177 // Increase bits in mantissa, making low-order bits 0.
178 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
179 mantissa |= 1 << (kFloatMantissaBits - 1); // Force a quiet NaN.
180 break;
181 }
182
183 case FP_NORMAL: {
184 // Increase bits in mantissa, making low-order bits 0.
185 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
186
187 // Change exponent bias.
188 exponent += (kFloatExponentBias - kFloat16ExponentBias);
189 break;
190 }
191
192 default:
193 UNREACHABLE();
194 return kFP32DefaultNaN;
195 }
196 return float_pack(sign, exponent, mantissa);
197 }
198
199 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
200 // Only the FPTieEven rounding mode is implemented.
201 DCHECK_EQ(round_mode, FPTieEven);
202 USE(round_mode);
203
204 int64_t sign = float_sign(value);
205 int64_t exponent =
206 static_cast<int64_t>(float_exp(value)) - kFloatExponentBias;
207 uint32_t mantissa = float_mantissa(value);
208
209 switch (std::fpclassify(value)) {
210 case FP_NAN: {
211 if (IsSignallingNaN(value)) {
212 FPProcessException();
213 }
214 if (DN()) return kFP16DefaultNaN;
215
216 // Convert NaNs as the processor would:
217 // - The sign is propagated.
218 // - The mantissa is transferred as much as possible, except that the top
219 // bit is forced to '1', making the result a quiet NaN.
220 float16 result =
221 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
222 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
223 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;
224 return result;
225 }
226
227 case FP_ZERO:
228 return (sign == 0) ? 0 : 0x8000;
229
230 case FP_INFINITE:
231 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
232
233 case FP_NORMAL:
234 case FP_SUBNORMAL: {
235 // Convert float-to-half as the processor would, assuming that FPCR.FZ
236 // (flush-to-zero) is not set.
237
238 // Add the implicit '1' bit to the mantissa.
239 mantissa += (1 << kFloatMantissaBits);
240 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
241 }
242 }
243
244 UNREACHABLE();
245 return kFP16DefaultNaN;
246 }
247
248 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
249 // Only the FPTieEven rounding mode is implemented.
250 DCHECK_EQ(round_mode, FPTieEven);
251 USE(round_mode);
252
253 int64_t sign = double_sign(value);
254 int64_t exponent =
255 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
256 uint64_t mantissa = double_mantissa(value);
257
258 switch (std::fpclassify(value)) {
259 case FP_NAN: {
260 if (IsSignallingNaN(value)) {
261 FPProcessException();
262 }
263 if (DN()) return kFP16DefaultNaN;
264
265 // Convert NaNs as the processor would:
266 // - The sign is propagated.
267 // - The mantissa is transferred as much as possible, except that the top
268 // bit is forced to '1', making the result a quiet NaN.
269 float16 result =
270 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
271 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
272 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;
273 return result;
274 }
275
276 case FP_ZERO:
277 return (sign == 0) ? 0 : 0x8000;
278
279 case FP_INFINITE:
280 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
281
282 case FP_NORMAL:
283 case FP_SUBNORMAL: {
284 // Convert double-to-half as the processor would, assuming that FPCR.FZ
285 // (flush-to-zero) is not set.
286
287 // Add the implicit '1' bit to the mantissa.
288 mantissa += (UINT64_C(1) << kDoubleMantissaBits);
289 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
290 }
291 }
292
293 UNREACHABLE();
294 return kFP16DefaultNaN;
295 }
296
297 float Simulator::FPToFloat(double value, FPRounding round_mode) {
298 // Only the FPTieEven rounding mode is implemented.
299 DCHECK((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
300 USE(round_mode);
301
302 switch (std::fpclassify(value)) {
303 case FP_NAN: {
304 if (IsSignallingNaN(value)) {
305 FPProcessException();
306 }
307 if (DN()) return kFP32DefaultNaN;
308
309 // Convert NaNs as the processor would:
310 // - The sign is propagated.
311 // - The mantissa is transferred as much as possible, except that the
312 // top bit is forced to '1', making the result a quiet NaN.
313
314 uint64_t raw = bit_cast<uint64_t>(value);
315
316 uint32_t sign = raw >> 63;
317 uint32_t exponent = (1 << 8) - 1;
318 uint32_t mantissa = static_cast<uint32_t>(unsigned_bitextract_64(
319 50, kDoubleMantissaBits - kFloatMantissaBits, raw));
320 mantissa |= (1 << (kFloatMantissaBits - 1)); // Force a quiet NaN.
321
322 return float_pack(sign, exponent, mantissa);
323 }
324
325 case FP_ZERO:
326 case FP_INFINITE: {
327 // In a C++ cast, any value representable in the target type will be
328 // unchanged. This is always the case for +/-0.0 and infinities.
329 return static_cast<float>(value);
330 }
331
332 case FP_NORMAL:
333 case FP_SUBNORMAL: {
334 // Convert double-to-float as the processor would, assuming that FPCR.FZ
335 // (flush-to-zero) is not set.
336 uint32_t sign = double_sign(value);
337 int64_t exponent =
338 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
339 uint64_t mantissa = double_mantissa(value);
340 if (std::fpclassify(value) == FP_NORMAL) {
341 // For normal FP values, add the hidden bit.
342 mantissa |= (UINT64_C(1) << kDoubleMantissaBits);
343 }
344 return FPRoundToFloat(sign, exponent, mantissa, round_mode);
345 }
346 }
347
348 UNREACHABLE();
349 return kFP32DefaultNaN;
350 }
351
352 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
353 dst.ClearForWrite(vform);
354 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
355 dst.ReadUintFromMem(vform, i, addr);
356 addr += LaneSizeInBytesFromFormat(vform);
357 }
358 }
359
360 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index,
361 uint64_t addr) {
362 dst.ReadUintFromMem(vform, index, addr);
363 }
364
365 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
366 dst.ClearForWrite(vform);
367 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
368 dst.ReadUintFromMem(vform, i, addr);
369 }
370 }
371
372 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
373 LogicVRegister dst2, uint64_t addr1) {
374 dst1.ClearForWrite(vform);
375 dst2.ClearForWrite(vform);
376 int esize = LaneSizeInBytesFromFormat(vform);
377 uint64_t addr2 = addr1 + esize;
378 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
379 dst1.ReadUintFromMem(vform, i, addr1);
380 dst2.ReadUintFromMem(vform, i, addr2);
381 addr1 += 2 * esize;
382 addr2 += 2 * esize;
383 }
384 }
385
386 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
387 LogicVRegister dst2, int index, uint64_t addr1) {
388 dst1.ClearForWrite(vform);
389 dst2.ClearForWrite(vform);
390 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
391 dst1.ReadUintFromMem(vform, index, addr1);
392 dst2.ReadUintFromMem(vform, index, addr2);
393 }
394
395 void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1,
396 LogicVRegister dst2, uint64_t addr) {
397 dst1.ClearForWrite(vform);
398 dst2.ClearForWrite(vform);
399 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
400 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
401 dst1.ReadUintFromMem(vform, i, addr);
402 dst2.ReadUintFromMem(vform, i, addr2);
403 }
404 }
405
406 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
407 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) {
408 dst1.ClearForWrite(vform);
409 dst2.ClearForWrite(vform);
410 dst3.ClearForWrite(vform);
411 int esize = LaneSizeInBytesFromFormat(vform);
412 uint64_t addr2 = addr1 + esize;
413 uint64_t addr3 = addr2 + esize;
414 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
415 dst1.ReadUintFromMem(vform, i, addr1);
416 dst2.ReadUintFromMem(vform, i, addr2);
417 dst3.ReadUintFromMem(vform, i, addr3);
418 addr1 += 3 * esize;
419 addr2 += 3 * esize;
420 addr3 += 3 * esize;
421 }
422 }
423
424 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
425 LogicVRegister dst2, LogicVRegister dst3, int index,
426 uint64_t addr1) {
427 dst1.ClearForWrite(vform);
428 dst2.ClearForWrite(vform);
429 dst3.ClearForWrite(vform);
430 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
431 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
432 dst1.ReadUintFromMem(vform, index, addr1);
433 dst2.ReadUintFromMem(vform, index, addr2);
434 dst3.ReadUintFromMem(vform, index, addr3);
435 }
436
437 void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1,
438 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) {
439 dst1.ClearForWrite(vform);
440 dst2.ClearForWrite(vform);
441 dst3.ClearForWrite(vform);
442 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
443 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
444 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
445 dst1.ReadUintFromMem(vform, i, addr);
446 dst2.ReadUintFromMem(vform, i, addr2);
447 dst3.ReadUintFromMem(vform, i, addr3);
448 }
449 }
450
451 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
452 LogicVRegister dst2, LogicVRegister dst3,
453 LogicVRegister dst4, uint64_t addr1) {
454 dst1.ClearForWrite(vform);
455 dst2.ClearForWrite(vform);
456 dst3.ClearForWrite(vform);
457 dst4.ClearForWrite(vform);
458 int esize = LaneSizeInBytesFromFormat(vform);
459 uint64_t addr2 = addr1 + esize;
460 uint64_t addr3 = addr2 + esize;
461 uint64_t addr4 = addr3 + esize;
462 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
463 dst1.ReadUintFromMem(vform, i, addr1);
464 dst2.ReadUintFromMem(vform, i, addr2);
465 dst3.ReadUintFromMem(vform, i, addr3);
466 dst4.ReadUintFromMem(vform, i, addr4);
467 addr1 += 4 * esize;
468 addr2 += 4 * esize;
469 addr3 += 4 * esize;
470 addr4 += 4 * esize;
471 }
472 }
473
474 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
475 LogicVRegister dst2, LogicVRegister dst3,
476 LogicVRegister dst4, int index, uint64_t addr1) {
477 dst1.ClearForWrite(vform);
478 dst2.ClearForWrite(vform);
479 dst3.ClearForWrite(vform);
480 dst4.ClearForWrite(vform);
481 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
482 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
483 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
484 dst1.ReadUintFromMem(vform, index, addr1);
485 dst2.ReadUintFromMem(vform, index, addr2);
486 dst3.ReadUintFromMem(vform, index, addr3);
487 dst4.ReadUintFromMem(vform, index, addr4);
488 }
489
490 void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1,
491 LogicVRegister dst2, LogicVRegister dst3,
492 LogicVRegister dst4, uint64_t addr) {
493 dst1.ClearForWrite(vform);
494 dst2.ClearForWrite(vform);
495 dst3.ClearForWrite(vform);
496 dst4.ClearForWrite(vform);
497 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
498 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
499 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
500 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
501 dst1.ReadUintFromMem(vform, i, addr);
502 dst2.ReadUintFromMem(vform, i, addr2);
503 dst3.ReadUintFromMem(vform, i, addr3);
504 dst4.ReadUintFromMem(vform, i, addr4);
505 }
506 }
507
508 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
509 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
510 src.WriteUintToMem(vform, i, addr);
511 addr += LaneSizeInBytesFromFormat(vform);
512 }
513 }
514
515 void Simulator::st1(VectorFormat vform, LogicVRegister src, int index,
516 uint64_t addr) {
517 src.WriteUintToMem(vform, index, addr);
518 }
519
520 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
521 uint64_t addr) {
522 int esize = LaneSizeInBytesFromFormat(vform);
523 uint64_t addr2 = addr + esize;
524 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
525 dst.WriteUintToMem(vform, i, addr);
526 dst2.WriteUintToMem(vform, i, addr2);
527 addr += 2 * esize;
528 addr2 += 2 * esize;
529 }
530 }
531
532 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
533 int index, uint64_t addr) {
534 int esize = LaneSizeInBytesFromFormat(vform);
535 dst.WriteUintToMem(vform, index, addr);
536 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
537 }
538
539 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
540 LogicVRegister dst3, uint64_t addr) {
541 int esize = LaneSizeInBytesFromFormat(vform);
542 uint64_t addr2 = addr + esize;
543 uint64_t addr3 = addr2 + esize;
544 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
545 dst.WriteUintToMem(vform, i, addr);
546 dst2.WriteUintToMem(vform, i, addr2);
547 dst3.WriteUintToMem(vform, i, addr3);
548 addr += 3 * esize;
549 addr2 += 3 * esize;
550 addr3 += 3 * esize;
551 }
552 }
553
554 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
555 LogicVRegister dst3, int index, uint64_t addr) {
556 int esize = LaneSizeInBytesFromFormat(vform);
557 dst.WriteUintToMem(vform, index, addr);
558 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
559 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
560 }
561
562 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
563 LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) {
564 int esize = LaneSizeInBytesFromFormat(vform);
565 uint64_t addr2 = addr + esize;
566 uint64_t addr3 = addr2 + esize;
567 uint64_t addr4 = addr3 + esize;
568 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
569 dst.WriteUintToMem(vform, i, addr);
570 dst2.WriteUintToMem(vform, i, addr2);
571 dst3.WriteUintToMem(vform, i, addr3);
572 dst4.WriteUintToMem(vform, i, addr4);
573 addr += 4 * esize;
574 addr2 += 4 * esize;
575 addr3 += 4 * esize;
576 addr4 += 4 * esize;
577 }
578 }
579
580 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
581 LogicVRegister dst3, LogicVRegister dst4, int index,
582 uint64_t addr) {
583 int esize = LaneSizeInBytesFromFormat(vform);
584 dst.WriteUintToMem(vform, index, addr);
585 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
586 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
587 dst4.WriteUintToMem(vform, index, addr + 3 * esize);
588 }
589
590 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
591 const LogicVRegister& src1,
592 const LogicVRegister& src2, Condition cond) {
593 dst.ClearForWrite(vform);
594 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
595 int64_t sa = src1.Int(vform, i);
596 int64_t sb = src2.Int(vform, i);
597 uint64_t ua = src1.Uint(vform, i);
598 uint64_t ub = src2.Uint(vform, i);
599 bool result = false;
600 switch (cond) {
601 case eq:
602 result = (ua == ub);
603 break;
604 case ge:
605 result = (sa >= sb);
606 break;
607 case gt:
608 result = (sa > sb);
609 break;
610 case hi:
611 result = (ua > ub);
612 break;
613 case hs:
614 result = (ua >= ub);
615 break;
616 case lt:
617 result = (sa < sb);
618 break;
619 case le:
620 result = (sa <= sb);
621 break;
622 default:
623 UNREACHABLE();
624 break;
625 }
626 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
627 }
628 return dst;
629 }
630
631 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
632 const LogicVRegister& src1, int imm,
633 Condition cond) {
634 SimVRegister temp;
635 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
636 return cmp(vform, dst, src1, imm_reg, cond);
637 }
638
639 LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst,
640 const LogicVRegister& src1,
641 const LogicVRegister& src2) {
642 dst.ClearForWrite(vform);
643 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
644 uint64_t ua = src1.Uint(vform, i);
645 uint64_t ub = src2.Uint(vform, i);
646 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
647 }
648 return dst;
649 }
650
651 LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst,
652 const LogicVRegister& src1,
653 const LogicVRegister& src2) {
654 int lane_size = LaneSizeInBitsFromFormat(vform);
655 dst.ClearForWrite(vform);
656 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
657 // Test for unsigned saturation.
658 uint64_t ua = src1.UintLeftJustified(vform, i);
659 uint64_t ub = src2.UintLeftJustified(vform, i);
660 uint64_t ur = ua + ub;
661 if (ur < ua) {
662 dst.SetUnsignedSat(i, true);
663 }
664
665 // Test for signed saturation.
666 bool pos_a = (ua >> 63) == 0;
667 bool pos_b = (ub >> 63) == 0;
668 bool pos_r = (ur >> 63) == 0;
669 // If the signs of the operands are the same, but different from the result,
670 // there was an overflow.
671 if ((pos_a == pos_b) && (pos_a != pos_r)) {
672 dst.SetSignedSat(i, pos_a);
673 }
674
675 dst.SetInt(vform, i, ur >> (64 - lane_size));
676 }
677 return dst;
678 }
679
680 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
681 const LogicVRegister& src1,
682 const LogicVRegister& src2) {
683 SimVRegister temp1, temp2;
684 uzp1(vform, temp1, src1, src2);
685 uzp2(vform, temp2, src1, src2);
686 add(vform, dst, temp1, temp2);
687 return dst;
688 }
689
690 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
691 const LogicVRegister& src1,
692 const LogicVRegister& src2) {
693 SimVRegister temp;
694 mul(vform, temp, src1, src2);
695 add(vform, dst, dst, temp);
696 return dst;
697 }
698
699 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
700 const LogicVRegister& src1,
701 const LogicVRegister& src2) {
702 SimVRegister temp;
703 mul(vform, temp, src1, src2);
704 sub(vform, dst, dst, temp);
705 return dst;
706 }
707
708 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
709 const LogicVRegister& src1,
710 const LogicVRegister& src2) {
711 dst.ClearForWrite(vform);
712 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
713 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
714 }
715 return dst;
716 }
717
718 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
719 const LogicVRegister& src1,
720 const LogicVRegister& src2, int index) {
721 SimVRegister temp;
722 VectorFormat indexform = VectorFormatFillQ(vform);
723 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
724 }
725
726 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
727 const LogicVRegister& src1,
728 const LogicVRegister& src2, int index) {
729 SimVRegister temp;
730 VectorFormat indexform = VectorFormatFillQ(vform);
731 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
732 }
733
734 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
735 const LogicVRegister& src1,
736 const LogicVRegister& src2, int index) {
737 SimVRegister temp;
738 VectorFormat indexform = VectorFormatFillQ(vform);
739 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
740 }
741
742 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
743 const LogicVRegister& src1,
744 const LogicVRegister& src2, int index) {
745 SimVRegister temp;
746 VectorFormat indexform =
747 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
748 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
749 }
750
751 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
752 const LogicVRegister& src1,
753 const LogicVRegister& src2, int index) {
754 SimVRegister temp;
755 VectorFormat indexform =
756 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
757 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
758 }
759
760 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
761 const LogicVRegister& src1,
762 const LogicVRegister& src2, int index) {
763 SimVRegister temp;
764 VectorFormat indexform =
765 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
766 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
767 }
768
769 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
770 const LogicVRegister& src1,
771 const LogicVRegister& src2, int index) {
772 SimVRegister temp;
773 VectorFormat indexform =
774 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
775 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
776 }
777
778 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
779 const LogicVRegister& src1,
780 const LogicVRegister& src2, int index) {
781 SimVRegister temp;
782 VectorFormat indexform =
783 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
784 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
785 }
786
787 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
788 const LogicVRegister& src1,
789 const LogicVRegister& src2, int index) {
790 SimVRegister temp;
791 VectorFormat indexform =
792 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
793 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
794 }
795
796 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
797 const LogicVRegister& src1,
798 const LogicVRegister& src2, int index) {
799 SimVRegister temp;
800 VectorFormat indexform =
801 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
802 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
803 }
804
805 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
806 const LogicVRegister& src1,
807 const LogicVRegister& src2, int index) {
808 SimVRegister temp;
809 VectorFormat indexform =
810 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
811 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
812 }
813
814 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
815 const LogicVRegister& src1,
816 const LogicVRegister& src2, int index) {
817 SimVRegister temp;
818 VectorFormat indexform =
819 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
820 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
821 }
822
823 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
824 const LogicVRegister& src1,
825 const LogicVRegister& src2, int index) {
826 SimVRegister temp;
827 VectorFormat indexform =
828 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
829 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
830 }
831
832 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
833 const LogicVRegister& src1,
834 const LogicVRegister& src2, int index) {
835 SimVRegister temp;
836 VectorFormat indexform =
837 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
838 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
839 }
840
841 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
842 const LogicVRegister& src1,
843 const LogicVRegister& src2, int index) {
844 SimVRegister temp;
845 VectorFormat indexform =
846 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
847 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
848 }
849
850 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
851 const LogicVRegister& src1,
852 const LogicVRegister& src2, int index) {
853 SimVRegister temp;
854 VectorFormat indexform =
855 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
856 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
857 }
858
859 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
860 const LogicVRegister& src1,
861 const LogicVRegister& src2, int index) {
862 SimVRegister temp;
863 VectorFormat indexform =
864 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
865 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
866 }
867
868 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
869 const LogicVRegister& src1,
870 const LogicVRegister& src2, int index) {
871 SimVRegister temp;
872 VectorFormat indexform =
873 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
874 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
875 }
876
877 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
878 const LogicVRegister& src1,
879 const LogicVRegister& src2, int index) {
880 SimVRegister temp;
881 VectorFormat indexform =
882 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
883 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
884 }
885
886 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
887 const LogicVRegister& src1,
888 const LogicVRegister& src2, int index) {
889 SimVRegister temp;
890 VectorFormat indexform =
891 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
892 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
893 }
894
895 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
896 const LogicVRegister& src1,
897 const LogicVRegister& src2, int index) {
898 SimVRegister temp;
899 VectorFormat indexform =
900 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
901 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
902 }
903
904 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
905 const LogicVRegister& src1,
906 const LogicVRegister& src2, int index) {
907 SimVRegister temp;
908 VectorFormat indexform = VectorFormatFillQ(vform);
909 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
910 }
911
912 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
913 const LogicVRegister& src1,
914 const LogicVRegister& src2, int index) {
915 SimVRegister temp;
916 VectorFormat indexform = VectorFormatFillQ(vform);
917 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
918 }
919
920 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
921 uint16_t result = 0;
922 uint16_t extended_op2 = op2;
923 for (int i = 0; i < 8; ++i) {
924 if ((op1 >> i) & 1) {
925 result = result ^ (extended_op2 << i);
926 }
927 }
928 return result;
929 }
930
931 LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst,
932 const LogicVRegister& src1,
933 const LogicVRegister& src2) {
934 dst.ClearForWrite(vform);
935 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
936 dst.SetUint(vform, i,
937 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
938 }
939 return dst;
940 }
941
942 LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst,
943 const LogicVRegister& src1,
944 const LogicVRegister& src2) {
945 VectorFormat vform_src = VectorFormatHalfWidth(vform);
946 dst.ClearForWrite(vform);
947 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
948 dst.SetUint(
949 vform, i,
950 PolynomialMult(src1.Uint(vform_src, i), src2.Uint(vform_src, i)));
951 }
952 return dst;
953 }
954
955 LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst,
956 const LogicVRegister& src1,
957 const LogicVRegister& src2) {
958 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
959 dst.ClearForWrite(vform);
960 int lane_count = LaneCountFromFormat(vform);
961 for (int i = 0; i < lane_count; i++) {
962 dst.SetUint(vform, i,
963 PolynomialMult(src1.Uint(vform_src, lane_count + i),
964 src2.Uint(vform_src, lane_count + i)));
965 }
966 return dst;
967 }
968
969 LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst,
970 const LogicVRegister& src1,
971 const LogicVRegister& src2) {
972 int lane_size = LaneSizeInBitsFromFormat(vform);
973 dst.ClearForWrite(vform);
974 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
975 // Test for unsigned saturation.
976 uint64_t ua = src1.UintLeftJustified(vform, i);
977 uint64_t ub = src2.UintLeftJustified(vform, i);
978 uint64_t ur = ua - ub;
979 if (ub > ua) {
980 dst.SetUnsignedSat(i, false);
981 }
982
983 // Test for signed saturation.
984 bool pos_a = (ua >> 63) == 0;
985 bool pos_b = (ub >> 63) == 0;
986 bool pos_r = (ur >> 63) == 0;
987 // If the signs of the operands are different, and the sign of the first
988 // operand doesn't match the result, there was an overflow.
989 if ((pos_a != pos_b) && (pos_a != pos_r)) {
990 dst.SetSignedSat(i, pos_a);
991 }
992
993 dst.SetInt(vform, i, ur >> (64 - lane_size));
994 }
995 return dst;
996 }
997
998 LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst,
999 const LogicVRegister& src1,
1000 const LogicVRegister& src2) {
1001 dst.ClearForWrite(vform);
1002 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1003 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1004 }
1005 return dst;
1006 }
1007
1008 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
1009 const LogicVRegister& src1,
1010 const LogicVRegister& src2) {
1011 dst.ClearForWrite(vform);
1012 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1013 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1014 }
1015 return dst;
1016 }
1017
1018 LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst,
1019 const LogicVRegister& src1,
1020 const LogicVRegister& src2) {
1021 dst.ClearForWrite(vform);
1022 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1023 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1024 }
1025 return dst;
1026 }
1027
1028 LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst,
1029 const LogicVRegister& src1,
1030 const LogicVRegister& src2) {
1031 dst.ClearForWrite(vform);
1032 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1033 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1034 }
1035 return dst;
1036 }
1037
1038 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1039 const LogicVRegister& src1,
1040 const LogicVRegister& src2) {
1041 dst.ClearForWrite(vform);
1042 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1043 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1044 }
1045 return dst;
1046 }
1047
1048 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1049 const LogicVRegister& src, uint64_t imm) {
1050 uint64_t result[16];
1051 int laneCount = LaneCountFromFormat(vform);
1052 for (int i = 0; i < laneCount; ++i) {
1053 result[i] = src.Uint(vform, i) & ~imm;
1054 }
1055 dst.SetUintArray(vform, result);
1056 return dst;
1057 }
1058
1059 LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst,
1060 const LogicVRegister& src1,
1061 const LogicVRegister& src2) {
1062 dst.ClearForWrite(vform);
1063 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1064 uint64_t operand1 = dst.Uint(vform, i);
1065 uint64_t operand2 = ~src2.Uint(vform, i);
1066 uint64_t operand3 = src1.Uint(vform, i);
1067 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1068 dst.SetUint(vform, i, result);
1069 }
1070 return dst;
1071 }
1072
1073 LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst,
1074 const LogicVRegister& src1,
1075 const LogicVRegister& src2) {
1076 dst.ClearForWrite(vform);
1077 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1078 uint64_t operand1 = dst.Uint(vform, i);
1079 uint64_t operand2 = src2.Uint(vform, i);
1080 uint64_t operand3 = src1.Uint(vform, i);
1081 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1082 dst.SetUint(vform, i, result);
1083 }
1084 return dst;
1085 }
1086
1087 LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst,
1088 const LogicVRegister& src1,
1089 const LogicVRegister& src2) {
1090 dst.ClearForWrite(vform);
1091 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1092 uint64_t operand1 = src2.Uint(vform, i);
1093 uint64_t operand2 = dst.Uint(vform, i);
1094 uint64_t operand3 = src1.Uint(vform, i);
1095 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1096 dst.SetUint(vform, i, result);
1097 }
1098 return dst;
1099 }
1100
1101 LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst,
1102 const LogicVRegister& src1,
1103 const LogicVRegister& src2, bool max) {
1104 dst.ClearForWrite(vform);
1105 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1106 int64_t src1_val = src1.Int(vform, i);
1107 int64_t src2_val = src2.Int(vform, i);
1108 int64_t dst_val;
1109 if (max) {
1110 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1111 } else {
1112 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1113 }
1114 dst.SetInt(vform, i, dst_val);
1115 }
1116 return dst;
1117 }
1118
1119 LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst,
1120 const LogicVRegister& src1,
1121 const LogicVRegister& src2) {
1122 return SMinMax(vform, dst, src1, src2, true);
1123 }
1124
1125 LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst,
1126 const LogicVRegister& src1,
1127 const LogicVRegister& src2) {
1128 return SMinMax(vform, dst, src1, src2, false);
1129 }
1130
1131 LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst,
1132 const LogicVRegister& src1,
1133 const LogicVRegister& src2, bool max) {
1134 int lanes = LaneCountFromFormat(vform);
1135 int64_t result[kMaxLanesPerVector];
1136 const LogicVRegister* src = &src1;
1137 for (int j = 0; j < 2; j++) {
1138 for (int i = 0; i < lanes; i += 2) {
1139 int64_t first_val = src->Int(vform, i);
1140 int64_t second_val = src->Int(vform, i + 1);
1141 int64_t dst_val;
1142 if (max) {
1143 dst_val = (first_val > second_val) ? first_val : second_val;
1144 } else {
1145 dst_val = (first_val < second_val) ? first_val : second_val;
1146 }
1147 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1148 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1149 }
1150 src = &src2;
1151 }
1152 dst.SetIntArray(vform, result);
1153 return dst;
1154 }
1155
1156 LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst,
1157 const LogicVRegister& src1,
1158 const LogicVRegister& src2) {
1159 return SMinMaxP(vform, dst, src1, src2, true);
1160 }
1161
1162 LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst,
1163 const LogicVRegister& src1,
1164 const LogicVRegister& src2) {
1165 return SMinMaxP(vform, dst, src1, src2, false);
1166 }
1167
1168 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
1169 const LogicVRegister& src) {
1170 DCHECK_EQ(vform, kFormatD);
1171
1172 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1173 dst.ClearForWrite(vform);
1174 dst.SetUint(vform, 0, dst_val);
1175 return dst;
1176 }
1177
1178 LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst,
1179 const LogicVRegister& src) {
1180 VectorFormat vform_dst =
1181 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1182
1183 int64_t dst_val = 0;
1184 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1185 dst_val += src.Int(vform, i);
1186 }
1187
1188 dst.ClearForWrite(vform_dst);
1189 dst.SetInt(vform_dst, 0, dst_val);
1190 return dst;
1191 }
1192
1193 LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst,
1194 const LogicVRegister& src) {
1195 VectorFormat vform_dst =
1196 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1197
1198 int64_t dst_val = 0;
1199 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1200 dst_val += src.Int(vform, i);
1201 }
1202
1203 dst.ClearForWrite(vform_dst);
1204 dst.SetInt(vform_dst, 0, dst_val);
1205 return dst;
1206 }
1207
1208 LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst,
1209 const LogicVRegister& src) {
1210 VectorFormat vform_dst =
1211 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1212
1213 uint64_t dst_val = 0;
1214 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1215 dst_val += src.Uint(vform, i);
1216 }
1217
1218 dst.ClearForWrite(vform_dst);
1219 dst.SetUint(vform_dst, 0, dst_val);
1220 return dst;
1221 }
1222
1223 LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst,
1224 const LogicVRegister& src, bool max) {
1225 int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1226 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1227 int64_t src_val = src.Int(vform, i);
1228 if (max) {
1229 dst_val = (src_val > dst_val) ? src_val : dst_val;
1230 } else {
1231 dst_val = (src_val < dst_val) ? src_val : dst_val;
1232 }
1233 }
1234 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1235 dst.SetInt(vform, 0, dst_val);
1236 return dst;
1237 }
1238
1239 LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst,
1240 const LogicVRegister& src) {
1241 SMinMaxV(vform, dst, src, true);
1242 return dst;
1243 }
1244
1245 LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst,
1246 const LogicVRegister& src) {
1247 SMinMaxV(vform, dst, src, false);
1248 return dst;
1249 }
1250
1251 LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst,
1252 const LogicVRegister& src1,
1253 const LogicVRegister& src2, bool max) {
1254 dst.ClearForWrite(vform);
1255 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1256 uint64_t src1_val = src1.Uint(vform, i);
1257 uint64_t src2_val = src2.Uint(vform, i);
1258 uint64_t dst_val;
1259 if (max) {
1260 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1261 } else {
1262 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1263 }
1264 dst.SetUint(vform, i, dst_val);
1265 }
1266 return dst;
1267 }
1268
1269 LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst,
1270 const LogicVRegister& src1,
1271 const LogicVRegister& src2) {
1272 return UMinMax(vform, dst, src1, src2, true);
1273 }
1274
1275 LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst,
1276 const LogicVRegister& src1,
1277 const LogicVRegister& src2) {
1278 return UMinMax(vform, dst, src1, src2, false);
1279 }
1280
1281 LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst,
1282 const LogicVRegister& src1,
1283 const LogicVRegister& src2, bool max) {
1284 int lanes = LaneCountFromFormat(vform);
1285 uint64_t result[kMaxLanesPerVector];
1286 const LogicVRegister* src = &src1;
1287 for (int j = 0; j < 2; j++) {
1288 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1289 uint64_t first_val = src->Uint(vform, i);
1290 uint64_t second_val = src->Uint(vform, i + 1);
1291 uint64_t dst_val;
1292 if (max) {
1293 dst_val = (first_val > second_val) ? first_val : second_val;
1294 } else {
1295 dst_val = (first_val < second_val) ? first_val : second_val;
1296 }
1297 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1298 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1299 }
1300 src = &src2;
1301 }
1302 dst.SetUintArray(vform, result);
1303 return dst;
1304 }
1305
1306 LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst,
1307 const LogicVRegister& src1,
1308 const LogicVRegister& src2) {
1309 return UMinMaxP(vform, dst, src1, src2, true);
1310 }
1311
1312 LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst,
1313 const LogicVRegister& src1,
1314 const LogicVRegister& src2) {
1315 return UMinMaxP(vform, dst, src1, src2, false);
1316 }
1317
1318 LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst,
1319 const LogicVRegister& src, bool max) {
1320 uint64_t dst_val = max ? 0 : UINT64_MAX;
1321 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1322 uint64_t src_val = src.Uint(vform, i);
1323 if (max) {
1324 dst_val = (src_val > dst_val) ? src_val : dst_val;
1325 } else {
1326 dst_val = (src_val < dst_val) ? src_val : dst_val;
1327 }
1328 }
1329 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1330 dst.SetUint(vform, 0, dst_val);
1331 return dst;
1332 }
1333
1334 LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst,
1335 const LogicVRegister& src) {
1336 UMinMaxV(vform, dst, src, true);
1337 return dst;
1338 }
1339
1340 LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst,
1341 const LogicVRegister& src) {
1342 UMinMaxV(vform, dst, src, false);
1343 return dst;
1344 }
1345
1346 LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst,
1347 const LogicVRegister& src, int shift) {
1348 DCHECK_GE(shift, 0);
1349 SimVRegister temp;
1350 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1351 return ushl(vform, dst, src, shiftreg);
1352 }
1353
1354 LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst,
1355 const LogicVRegister& src, int shift) {
1356 DCHECK_GE(shift, 0);
1357 SimVRegister temp1, temp2;
1358 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1359 LogicVRegister extendedreg = sxtl(vform, temp2, src);
1360 return sshl(vform, dst, extendedreg, shiftreg);
1361 }
1362
1363 LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst,
1364 const LogicVRegister& src, int shift) {
1365 DCHECK_GE(shift, 0);
1366 SimVRegister temp1, temp2;
1367 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1368 LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1369 return sshl(vform, dst, extendedreg, shiftreg);
1370 }
1371
1372 LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst,
1373 const LogicVRegister& src) {
1374 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1375 return sshll(vform, dst, src, shift);
1376 }
1377
1378 LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst,
1379 const LogicVRegister& src) {
1380 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1381 return sshll2(vform, dst, src, shift);
1382 }
1383
1384 LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst,
1385 const LogicVRegister& src, int shift) {
1386 DCHECK_GE(shift, 0);
1387 SimVRegister temp1, temp2;
1388 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1389 LogicVRegister extendedreg = uxtl(vform, temp2, src);
1390 return ushl(vform, dst, extendedreg, shiftreg);
1391 }
1392
1393 LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst,
1394 const LogicVRegister& src, int shift) {
1395 DCHECK_GE(shift, 0);
1396 SimVRegister temp1, temp2;
1397 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1398 LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1399 return ushl(vform, dst, extendedreg, shiftreg);
1400 }
1401
1402 LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst,
1403 const LogicVRegister& src, int shift) {
1404 dst.ClearForWrite(vform);
1405 int laneCount = LaneCountFromFormat(vform);
1406 for (int i = 0; i < laneCount; i++) {
1407 uint64_t src_lane = src.Uint(vform, i);
1408 uint64_t dst_lane = dst.Uint(vform, i);
1409 uint64_t shifted = src_lane << shift;
1410 uint64_t mask = MaxUintFromFormat(vform) << shift;
1411 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1412 }
1413 return dst;
1414 }
1415
1416 LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst,
1417 const LogicVRegister& src, int shift) {
1418 DCHECK_GE(shift, 0);
1419 SimVRegister temp;
1420 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1421 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1422 }
1423
1424 LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst,
1425 const LogicVRegister& src, int shift) {
1426 DCHECK_GE(shift, 0);
1427 SimVRegister temp;
1428 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1429 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1430 }
1431
1432 LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst,
1433 const LogicVRegister& src, int shift) {
1434 DCHECK_GE(shift, 0);
1435 SimVRegister temp;
1436 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1437 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1438 }
1439
1440 LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst,
1441 const LogicVRegister& src, int shift) {
1442 dst.ClearForWrite(vform);
1443 int laneCount = LaneCountFromFormat(vform);
1444 DCHECK((shift > 0) &&
1445 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1446 for (int i = 0; i < laneCount; i++) {
1447 uint64_t src_lane = src.Uint(vform, i);
1448 uint64_t dst_lane = dst.Uint(vform, i);
1449 uint64_t shifted;
1450 uint64_t mask;
1451 if (shift == 64) {
1452 shifted = 0;
1453 mask = 0;
1454 } else {
1455 shifted = src_lane >> shift;
1456 mask = MaxUintFromFormat(vform) >> shift;
1457 }
1458 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1459 }
1460 return dst;
1461 }
1462
1463 LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst,
1464 const LogicVRegister& src, int shift) {
1465 DCHECK_GE(shift, 0);
1466 SimVRegister temp;
1467 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1468 return ushl(vform, dst, src, shiftreg);
1469 }
1470
1471 LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst,
1472 const LogicVRegister& src, int shift) {
1473 DCHECK_GE(shift, 0);
1474 SimVRegister temp;
1475 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1476 return sshl(vform, dst, src, shiftreg);
1477 }
1478
1479 LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst,
1480 const LogicVRegister& src, int shift) {
1481 SimVRegister temp;
1482 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1483 return add(vform, dst, dst, shifted_reg);
1484 }
1485
1486 LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst,
1487 const LogicVRegister& src, int shift) {
1488 SimVRegister temp;
1489 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1490 return add(vform, dst, dst, shifted_reg);
1491 }
1492
1493 LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst,
1494 const LogicVRegister& src, int shift) {
1495 SimVRegister temp;
1496 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1497 return add(vform, dst, dst, shifted_reg);
1498 }
1499
1500 LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst,
1501 const LogicVRegister& src, int shift) {
1502 SimVRegister temp;
1503 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1504 return add(vform, dst, dst, shifted_reg);
1505 }
1506
1507 LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst,
1508 const LogicVRegister& src) {
1509 uint64_t result[16];
1510 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1511 int laneCount = LaneCountFromFormat(vform);
1512 for (int i = 0; i < laneCount; i++) {
1513 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1514 }
1515
1516 dst.SetUintArray(vform, result);
1517 return dst;
1518 }
1519
1520 LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst,
1521 const LogicVRegister& src) {
1522 uint64_t result[16];
1523 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1524 int laneCount = LaneCountFromFormat(vform);
1525 for (int i = 0; i < laneCount; i++) {
1526 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1527 }
1528
1529 dst.SetUintArray(vform, result);
1530 return dst;
1531 }
1532
1533 LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst,
1534 const LogicVRegister& src) {
1535 uint64_t result[16];
1536 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1537 int laneCount = LaneCountFromFormat(vform);
1538 for (int i = 0; i < laneCount; i++) {
1539 uint64_t value = src.Uint(vform, i);
1540 result[i] = 0;
1541 for (int j = 0; j < laneSizeInBits; j++) {
1542 result[i] += (value & 1);
1543 value >>= 1;
1544 }
1545 }
1546
1547 dst.SetUintArray(vform, result);
1548 return dst;
1549 }
1550
1551 LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst,
1552 const LogicVRegister& src1,
1553 const LogicVRegister& src2) {
1554 dst.ClearForWrite(vform);
1555 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1556 int8_t shift_val = src2.Int(vform, i);
1557 int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1558
1559 // Set signed saturation state.
1560 if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) &&
1561 (lj_src_val != 0)) {
1562 dst.SetSignedSat(i, lj_src_val >= 0);
1563 }
1564
1565 // Set unsigned saturation state.
1566 if (lj_src_val < 0) {
1567 dst.SetUnsignedSat(i, false);
1568 } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) &&
1569 (lj_src_val != 0)) {
1570 dst.SetUnsignedSat(i, true);
1571 }
1572
1573 int64_t src_val = src1.Int(vform, i);
1574 bool src_is_negative = src_val < 0;
1575 if (shift_val > 63) {
1576 dst.SetInt(vform, i, 0);
1577 } else if (shift_val < -63) {
1578 dst.SetRounding(i, src_is_negative);
1579 dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1580 } else {
1581 // Use unsigned types for shifts, as behaviour is undefined for signed
1582 // lhs.
1583 uint64_t usrc_val = static_cast<uint64_t>(src_val);
1584
1585 if (shift_val < 0) {
1586 // Convert to right shift.
1587 shift_val = -shift_val;
1588
1589 // Set rounding state by testing most-significant bit shifted out.
1590 // Rounding only needed on right shifts.
1591 if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1592 dst.SetRounding(i, true);
1593 }
1594
1595 usrc_val >>= shift_val;
1596
1597 if (src_is_negative) {
1598 // Simulate sign-extension.
1599 usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1600 }
1601 } else {
1602 usrc_val <<= shift_val;
1603 }
1604 dst.SetUint(vform, i, usrc_val);
1605 }
1606 }
1607 return dst;
1608 }
1609
1610 LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst,
1611 const LogicVRegister& src1,
1612 const LogicVRegister& src2) {
1613 dst.ClearForWrite(vform);
1614 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1615 int8_t shift_val = src2.Int(vform, i);
1616 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1617
1618 // Set saturation state.
1619 if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) {
1620 dst.SetUnsignedSat(i, true);
1621 }
1622
1623 uint64_t src_val = src1.Uint(vform, i);
1624 if ((shift_val > 63) || (shift_val < -64)) {
1625 dst.SetUint(vform, i, 0);
1626 } else {
1627 if (shift_val < 0) {
1628 // Set rounding state. Rounding only needed on right shifts.
1629 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1630 dst.SetRounding(i, true);
1631 }
1632
1633 if (shift_val == -64) {
1634 src_val = 0;
1635 } else {
1636 src_val >>= -shift_val;
1637 }
1638 } else {
1639 src_val <<= shift_val;
1640 }
1641 dst.SetUint(vform, i, src_val);
1642 }
1643 }
1644 return dst;
1645 }
1646
1647 LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst,
1648 const LogicVRegister& src) {
1649 dst.ClearForWrite(vform);
1650 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1651 // Test for signed saturation.
1652 int64_t sa = src.Int(vform, i);
1653 if (sa == MinIntFromFormat(vform)) {
1654 dst.SetSignedSat(i, true);
1655 }
1656 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1657 }
1658 return dst;
1659 }
1660
1661 LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst,
1662 const LogicVRegister& src) {
1663 dst.ClearForWrite(vform);
1664 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1665 int64_t sa = dst.IntLeftJustified(vform, i);
1666 uint64_t ub = src.UintLeftJustified(vform, i);
1667 uint64_t ur = sa + ub;
1668
1669 int64_t sr = bit_cast<int64_t>(ur);
1670 if (sr < sa) { // Test for signed positive saturation.
1671 dst.SetInt(vform, i, MaxIntFromFormat(vform));
1672 } else {
1673 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
1674 }
1675 }
1676 return dst;
1677 }
1678
1679 LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst,
1680 const LogicVRegister& src) {
1681 dst.ClearForWrite(vform);
1682 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1683 uint64_t ua = dst.UintLeftJustified(vform, i);
1684 int64_t sb = src.IntLeftJustified(vform, i);
1685 uint64_t ur = ua + sb;
1686
1687 if ((sb > 0) && (ur <= ua)) {
1688 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
1689 } else if ((sb < 0) && (ur >= ua)) {
1690 dst.SetUint(vform, i, 0); // Negative saturation.
1691 } else {
1692 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
1693 }
1694 }
1695 return dst;
1696 }
1697
1698 LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst,
1699 const LogicVRegister& src) {
1700 dst.ClearForWrite(vform);
1701 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1702 // Test for signed saturation.
1703 int64_t sa = src.Int(vform, i);
1704 if (sa == MinIntFromFormat(vform)) {
1705 dst.SetSignedSat(i, true);
1706 }
1707 if (sa < 0) {
1708 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1709 } else {
1710 dst.SetInt(vform, i, sa);
1711 }
1712 }
1713 return dst;
1714 }
1715
1716 LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform,
1717 LogicVRegister dst, bool dstIsSigned,
1718 const LogicVRegister& src,
1719 bool srcIsSigned) {
1720 bool upperhalf = false;
1721 VectorFormat srcform = kFormatUndefined;
1722 int64_t ssrc[8];
1723 uint64_t usrc[8];
1724
1725 switch (dstform) {
1726 case kFormat8B:
1727 upperhalf = false;
1728 srcform = kFormat8H;
1729 break;
1730 case kFormat16B:
1731 upperhalf = true;
1732 srcform = kFormat8H;
1733 break;
1734 case kFormat4H:
1735 upperhalf = false;
1736 srcform = kFormat4S;
1737 break;
1738 case kFormat8H:
1739 upperhalf = true;
1740 srcform = kFormat4S;
1741 break;
1742 case kFormat2S:
1743 upperhalf = false;
1744 srcform = kFormat2D;
1745 break;
1746 case kFormat4S:
1747 upperhalf = true;
1748 srcform = kFormat2D;
1749 break;
1750 case kFormatB:
1751 upperhalf = false;
1752 srcform = kFormatH;
1753 break;
1754 case kFormatH:
1755 upperhalf = false;
1756 srcform = kFormatS;
1757 break;
1758 case kFormatS:
1759 upperhalf = false;
1760 srcform = kFormatD;
1761 break;
1762 default:
1763 UNIMPLEMENTED();
1764 }
1765
1766 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1767 ssrc[i] = src.Int(srcform, i);
1768 usrc[i] = src.Uint(srcform, i);
1769 }
1770
1771 int offset;
1772 if (upperhalf) {
1773 offset = LaneCountFromFormat(dstform) / 2;
1774 } else {
1775 offset = 0;
1776 dst.ClearForWrite(dstform);
1777 }
1778
1779 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1780 // Test for signed saturation
1781 if (ssrc[i] > MaxIntFromFormat(dstform)) {
1782 dst.SetSignedSat(offset + i, true);
1783 } else if (ssrc[i] < MinIntFromFormat(dstform)) {
1784 dst.SetSignedSat(offset + i, false);
1785 }
1786
1787 // Test for unsigned saturation
1788 if (srcIsSigned) {
1789 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
1790 dst.SetUnsignedSat(offset + i, true);
1791 } else if (ssrc[i] < 0) {
1792 dst.SetUnsignedSat(offset + i, false);
1793 }
1794 } else {
1795 if (usrc[i] > MaxUintFromFormat(dstform)) {
1796 dst.SetUnsignedSat(offset + i, true);
1797 }
1798 }
1799
1800 int64_t result;
1801 if (srcIsSigned) {
1802 result = ssrc[i] & MaxUintFromFormat(dstform);
1803 } else {
1804 result = usrc[i] & MaxUintFromFormat(dstform);
1805 }
1806
1807 if (dstIsSigned) {
1808 dst.SetInt(dstform, offset + i, result);
1809 } else {
1810 dst.SetUint(dstform, offset + i, result);
1811 }
1812 }
1813 return dst;
1814 }
1815
1816 LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst,
1817 const LogicVRegister& src) {
1818 return ExtractNarrow(vform, dst, true, src, true);
1819 }
1820
1821 LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst,
1822 const LogicVRegister& src) {
1823 return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform);
1824 }
1825
1826 LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst,
1827 const LogicVRegister& src) {
1828 return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
1829 }
1830
1831 LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst,
1832 const LogicVRegister& src) {
1833 return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
1834 }
1835
1836 LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst,
1837 const LogicVRegister& src1,
1838 const LogicVRegister& src2, bool issigned) {
1839 dst.ClearForWrite(vform);
1840 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1841 if (issigned) {
1842 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
1843 sr = sr > 0 ? sr : -sr;
1844 dst.SetInt(vform, i, sr);
1845 } else {
1846 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
1847 sr = sr > 0 ? sr : -sr;
1848 dst.SetUint(vform, i, sr);
1849 }
1850 }
1851 return dst;
1852 }
1853
1854 LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst,
1855 const LogicVRegister& src1,
1856 const LogicVRegister& src2) {
1857 SimVRegister temp;
1858 dst.ClearForWrite(vform);
1859 AbsDiff(vform, temp, src1, src2, true);
1860 add(vform, dst, dst, temp);
1861 return dst;
1862 }
1863
1864 LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst,
1865 const LogicVRegister& src1,
1866 const LogicVRegister& src2) {
1867 SimVRegister temp;
1868 dst.ClearForWrite(vform);
1869 AbsDiff(vform, temp, src1, src2, false);
1870 add(vform, dst, dst, temp);
1871 return dst;
1872 }
1873
1874 LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst,
1875 const LogicVRegister& src) {
1876 dst.ClearForWrite(vform);
1877 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1878 dst.SetUint(vform, i, ~src.Uint(vform, i));
1879 }
1880 return dst;
1881 }
1882
1883 LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst,
1884 const LogicVRegister& src) {
1885 uint64_t result[16];
1886 int laneCount = LaneCountFromFormat(vform);
1887 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1888 uint64_t reversed_value;
1889 uint64_t value;
1890 for (int i = 0; i < laneCount; i++) {
1891 value = src.Uint(vform, i);
1892 reversed_value = 0;
1893 for (int j = 0; j < laneSizeInBits; j++) {
1894 reversed_value = (reversed_value << 1) | (value & 1);
1895 value >>= 1;
1896 }
1897 result[i] = reversed_value;
1898 }
1899
1900 dst.SetUintArray(vform, result);
1901 return dst;
1902 }
1903
1904 LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst,
1905 const LogicVRegister& src, int revSize) {
1906 uint64_t result[16];
1907 int laneCount = LaneCountFromFormat(vform);
1908 int laneSize = LaneSizeInBytesFromFormat(vform);
1909 int lanesPerLoop = revSize / laneSize;
1910 for (int i = 0; i < laneCount; i += lanesPerLoop) {
1911 for (int j = 0; j < lanesPerLoop; j++) {
1912 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
1913 }
1914 }
1915 dst.SetUintArray(vform, result);
1916 return dst;
1917 }
1918
1919 LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst,
1920 const LogicVRegister& src) {
1921 return rev(vform, dst, src, 2);
1922 }
1923
1924 LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst,
1925 const LogicVRegister& src) {
1926 return rev(vform, dst, src, 4);
1927 }
1928
1929 LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst,
1930 const LogicVRegister& src) {
1931 return rev(vform, dst, src, 8);
1932 }
1933
1934 LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst,
1935 const LogicVRegister& src, bool is_signed,
1936 bool do_accumulate) {
1937 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
1938 DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U);
1939 DCHECK_LE(LaneCountFromFormat(vform), 8);
1940
1941 uint64_t result[8];
1942 int lane_count = LaneCountFromFormat(vform);
1943 for (int i = 0; i < lane_count; i++) {
1944 if (is_signed) {
1945 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
1946 src.Int(vformsrc, 2 * i + 1));
1947 } else {
1948 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
1949 }
1950 }
1951
1952 dst.ClearForWrite(vform);
1953 for (int i = 0; i < lane_count; ++i) {
1954 if (do_accumulate) {
1955 result[i] += dst.Uint(vform, i);
1956 }
1957 dst.SetUint(vform, i, result[i]);
1958 }
1959
1960 return dst;
1961 }
1962
1963 LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst,
1964 const LogicVRegister& src) {
1965 return addlp(vform, dst, src, true, false);
1966 }
1967
1968 LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst,
1969 const LogicVRegister& src) {
1970 return addlp(vform, dst, src, false, false);
1971 }
1972
1973 LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst,
1974 const LogicVRegister& src) {
1975 return addlp(vform, dst, src, true, true);
1976 }
1977
1978 LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst,
1979 const LogicVRegister& src) {
1980 return addlp(vform, dst, src, false, true);
1981 }
1982
1983 LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst,
1984 const LogicVRegister& src1,
1985 const LogicVRegister& src2, int index) {
1986 uint8_t result[16];
1987 int laneCount = LaneCountFromFormat(vform);
1988 for (int i = 0; i < laneCount - index; ++i) {
1989 result[i] = src1.Uint(vform, i + index);
1990 }
1991 for (int i = 0; i < index; ++i) {
1992 result[laneCount - index + i] = src2.Uint(vform, i);
1993 }
1994 dst.ClearForWrite(vform);
1995 for (int i = 0; i < laneCount; ++i) {
1996 dst.SetUint(vform, i, result[i]);
1997 }
1998 return dst;
1999 }
2000
2001 LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst,
2002 const LogicVRegister& src,
2003 int src_index) {
2004 int laneCount = LaneCountFromFormat(vform);
2005 uint64_t value = src.Uint(vform, src_index);
2006 dst.ClearForWrite(vform);
2007 for (int i = 0; i < laneCount; ++i) {
2008 dst.SetUint(vform, i, value);
2009 }
2010 return dst;
2011 }
2012
2013 LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst,
2014 uint64_t imm) {
2015 int laneCount = LaneCountFromFormat(vform);
2016 uint64_t value = imm & MaxUintFromFormat(vform);
2017 dst.ClearForWrite(vform);
2018 for (int i = 0; i < laneCount; ++i) {
2019 dst.SetUint(vform, i, value);
2020 }
2021 return dst;
2022 }
2023
2024 LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst,
2025 int dst_index, const LogicVRegister& src,
2026 int src_index) {
2027 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2028 return dst;
2029 }
2030
2031 LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst,
2032 int dst_index, uint64_t imm) {
2033 uint64_t value = imm & MaxUintFromFormat(vform);
2034 dst.SetUint(vform, dst_index, value);
2035 return dst;
2036 }
2037
2038 LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst,
2039 uint64_t imm) {
2040 int laneCount = LaneCountFromFormat(vform);
2041 dst.ClearForWrite(vform);
2042 for (int i = 0; i < laneCount; ++i) {
2043 dst.SetUint(vform, i, imm);
2044 }
2045 return dst;
2046 }
2047
2048 LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst,
2049 uint64_t imm) {
2050 int laneCount = LaneCountFromFormat(vform);
2051 dst.ClearForWrite(vform);
2052 for (int i = 0; i < laneCount; ++i) {
2053 dst.SetUint(vform, i, ~imm);
2054 }
2055 return dst;
2056 }
2057
2058 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
2059 const LogicVRegister& src, uint64_t imm) {
2060 uint64_t result[16];
2061 int laneCount = LaneCountFromFormat(vform);
2062 for (int i = 0; i < laneCount; ++i) {
2063 result[i] = src.Uint(vform, i) | imm;
2064 }
2065 dst.SetUintArray(vform, result);
2066 return dst;
2067 }
2068
2069 LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst,
2070 const LogicVRegister& src) {
2071 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2072
2073 dst.ClearForWrite(vform);
2074 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2075 dst.SetUint(vform, i, src.Uint(vform_half, i));
2076 }
2077 return dst;
2078 }
2079
2080 LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst,
2081 const LogicVRegister& src) {
2082 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2083
2084 dst.ClearForWrite(vform);
2085 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2086 dst.SetInt(vform, i, src.Int(vform_half, i));
2087 }
2088 return dst;
2089 }
2090
2091 LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst,
2092 const LogicVRegister& src) {
2093 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2094 int lane_count = LaneCountFromFormat(vform);
2095
2096 dst.ClearForWrite(vform);
2097 for (int i = 0; i < lane_count; i++) {
2098 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2099 }
2100 return dst;
2101 }
2102
2103 LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst,
2104 const LogicVRegister& src) {
2105 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2106 int lane_count = LaneCountFromFormat(vform);
2107
2108 dst.ClearForWrite(vform);
2109 for (int i = 0; i < lane_count; i++) {
2110 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2111 }
2112 return dst;
2113 }
2114
2115 LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst,
2116 const LogicVRegister& src, int shift) {
2117 SimVRegister temp;
2118 VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2119 VectorFormat vform_dst = vform;
2120 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2121 return ExtractNarrow(vform_dst, dst, false, shifted_src, false);
2122 }
2123
2124 LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst,
2125 const LogicVRegister& src, int shift) {
2126 SimVRegister temp;
2127 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2128 VectorFormat vformdst = vform;
2129 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2130 return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2131 }
2132
2133 LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst,
2134 const LogicVRegister& src, int shift) {
2135 SimVRegister temp;
2136 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2137 VectorFormat vformdst = vform;
2138 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2139 return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2140 }
2141
2142 LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst,
2143 const LogicVRegister& src, int shift) {
2144 SimVRegister temp;
2145 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2146 VectorFormat vformdst = vform;
2147 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2148 return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2149 }
2150
2151 LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst,
2152 const LogicVRegister& ind,
2153 bool zero_out_of_bounds,
2154 const LogicVRegister* tab1,
2155 const LogicVRegister* tab2,
2156 const LogicVRegister* tab3,
2157 const LogicVRegister* tab4) {
2158 DCHECK_NOT_NULL(tab1);
2159 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
2160 uint64_t result[kMaxLanesPerVector];
2161 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2162 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
2163 }
2164 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2165 uint64_t j = ind.Uint(vform, i);
2166 int tab_idx = static_cast<int>(j >> 4);
2167 int j_idx = static_cast<int>(j & 15);
2168 if ((tab_idx < 4) && (tab[tab_idx] != NULL)) {
2169 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
2170 }
2171 }
2172 dst.SetUintArray(vform, result);
2173 return dst;
2174 }
2175
2176 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2177 const LogicVRegister& tab,
2178 const LogicVRegister& ind) {
2179 return Table(vform, dst, ind, true, &tab);
2180 }
2181
2182 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2183 const LogicVRegister& tab,
2184 const LogicVRegister& tab2,
2185 const LogicVRegister& ind) {
2186 return Table(vform, dst, ind, true, &tab, &tab2);
2187 }
2188
2189 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2190 const LogicVRegister& tab,
2191 const LogicVRegister& tab2,
2192 const LogicVRegister& tab3,
2193 const LogicVRegister& ind) {
2194 return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
2195 }
2196
2197 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2198 const LogicVRegister& tab,
2199 const LogicVRegister& tab2,
2200 const LogicVRegister& tab3,
2201 const LogicVRegister& tab4,
2202 const LogicVRegister& ind) {
2203 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
2204 }
2205
2206 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2207 const LogicVRegister& tab,
2208 const LogicVRegister& ind) {
2209 return Table(vform, dst, ind, false, &tab);
2210 }
2211
2212 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2213 const LogicVRegister& tab,
2214 const LogicVRegister& tab2,
2215 const LogicVRegister& ind) {
2216 return Table(vform, dst, ind, false, &tab, &tab2);
2217 }
2218
2219 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2220 const LogicVRegister& tab,
2221 const LogicVRegister& tab2,
2222 const LogicVRegister& tab3,
2223 const LogicVRegister& ind) {
2224 return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
2225 }
2226
2227 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2228 const LogicVRegister& tab,
2229 const LogicVRegister& tab2,
2230 const LogicVRegister& tab3,
2231 const LogicVRegister& tab4,
2232 const LogicVRegister& ind) {
2233 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
2234 }
2235
2236 LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst,
2237 const LogicVRegister& src, int shift) {
2238 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2239 }
2240
2241 LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst,
2242 const LogicVRegister& src, int shift) {
2243 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2244 }
2245
2246 LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst,
2247 const LogicVRegister& src, int shift) {
2248 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2249 }
2250
2251 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst,
2252 const LogicVRegister& src, int shift) {
2253 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2254 }
2255
2256 LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst,
2257 const LogicVRegister& src, int shift) {
2258 SimVRegister temp;
2259 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2260 VectorFormat vformdst = vform;
2261 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2262 return sqxtn(vformdst, dst, shifted_src);
2263 }
2264
2265 LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst,
2266 const LogicVRegister& src, int shift) {
2267 SimVRegister temp;
2268 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2269 VectorFormat vformdst = vform;
2270 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2271 return sqxtn(vformdst, dst, shifted_src);
2272 }
2273
2274 LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst,
2275 const LogicVRegister& src, int shift) {
2276 SimVRegister temp;
2277 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2278 VectorFormat vformdst = vform;
2279 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2280 return sqxtn(vformdst, dst, shifted_src);
2281 }
2282
2283 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst,
2284 const LogicVRegister& src, int shift) {
2285 SimVRegister temp;
2286 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2287 VectorFormat vformdst = vform;
2288 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2289 return sqxtn(vformdst, dst, shifted_src);
2290 }
2291
2292 LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst,
2293 const LogicVRegister& src, int shift) {
2294 SimVRegister temp;
2295 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2296 VectorFormat vformdst = vform;
2297 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2298 return sqxtun(vformdst, dst, shifted_src);
2299 }
2300
2301 LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst,
2302 const LogicVRegister& src, int shift) {
2303 SimVRegister temp;
2304 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2305 VectorFormat vformdst = vform;
2306 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2307 return sqxtun(vformdst, dst, shifted_src);
2308 }
2309
2310 LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst,
2311 const LogicVRegister& src, int shift) {
2312 SimVRegister temp;
2313 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2314 VectorFormat vformdst = vform;
2315 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2316 return sqxtun(vformdst, dst, shifted_src);
2317 }
2318
2319 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst,
2320 const LogicVRegister& src, int shift) {
2321 SimVRegister temp;
2322 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2323 VectorFormat vformdst = vform;
2324 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2325 return sqxtun(vformdst, dst, shifted_src);
2326 }
2327
2328 LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst,
2329 const LogicVRegister& src1,
2330 const LogicVRegister& src2) {
2331 SimVRegister temp1, temp2;
2332 uxtl(vform, temp1, src1);
2333 uxtl(vform, temp2, src2);
2334 add(vform, dst, temp1, temp2);
2335 return dst;
2336 }
2337
2338 LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst,
2339 const LogicVRegister& src1,
2340 const LogicVRegister& src2) {
2341 SimVRegister temp1, temp2;
2342 uxtl2(vform, temp1, src1);
2343 uxtl2(vform, temp2, src2);
2344 add(vform, dst, temp1, temp2);
2345 return dst;
2346 }
2347
2348 LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst,
2349 const LogicVRegister& src1,
2350 const LogicVRegister& src2) {
2351 SimVRegister temp;
2352 uxtl(vform, temp, src2);
2353 add(vform, dst, src1, temp);
2354 return dst;
2355 }
2356
2357 LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst,
2358 const LogicVRegister& src1,
2359 const LogicVRegister& src2) {
2360 SimVRegister temp;
2361 uxtl2(vform, temp, src2);
2362 add(vform, dst, src1, temp);
2363 return dst;
2364 }
2365
2366 LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst,
2367 const LogicVRegister& src1,
2368 const LogicVRegister& src2) {
2369 SimVRegister temp1, temp2;
2370 sxtl(vform, temp1, src1);
2371 sxtl(vform, temp2, src2);
2372 add(vform, dst, temp1, temp2);
2373 return dst;
2374 }
2375
2376 LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst,
2377 const LogicVRegister& src1,
2378 const LogicVRegister& src2) {
2379 SimVRegister temp1, temp2;
2380 sxtl2(vform, temp1, src1);
2381 sxtl2(vform, temp2, src2);
2382 add(vform, dst, temp1, temp2);
2383 return dst;
2384 }
2385
2386 LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst,
2387 const LogicVRegister& src1,
2388 const LogicVRegister& src2) {
2389 SimVRegister temp;
2390 sxtl(vform, temp, src2);
2391 add(vform, dst, src1, temp);
2392 return dst;
2393 }
2394
2395 LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst,
2396 const LogicVRegister& src1,
2397 const LogicVRegister& src2) {
2398 SimVRegister temp;
2399 sxtl2(vform, temp, src2);
2400 add(vform, dst, src1, temp);
2401 return dst;
2402 }
2403
2404 LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst,
2405 const LogicVRegister& src1,
2406 const LogicVRegister& src2) {
2407 SimVRegister temp1, temp2;
2408 uxtl(vform, temp1, src1);
2409 uxtl(vform, temp2, src2);
2410 sub(vform, dst, temp1, temp2);
2411 return dst;
2412 }
2413
2414 LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst,
2415 const LogicVRegister& src1,
2416 const LogicVRegister& src2) {
2417 SimVRegister temp1, temp2;
2418 uxtl2(vform, temp1, src1);
2419 uxtl2(vform, temp2, src2);
2420 sub(vform, dst, temp1, temp2);
2421 return dst;
2422 }
2423
2424 LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst,
2425 const LogicVRegister& src1,
2426 const LogicVRegister& src2) {
2427 SimVRegister temp;
2428 uxtl(vform, temp, src2);
2429 sub(vform, dst, src1, temp);
2430 return dst;
2431 }
2432
2433 LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst,
2434 const LogicVRegister& src1,
2435 const LogicVRegister& src2) {
2436 SimVRegister temp;
2437 uxtl2(vform, temp, src2);
2438 sub(vform, dst, src1, temp);
2439 return dst;
2440 }
2441
2442 LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst,
2443 const LogicVRegister& src1,
2444 const LogicVRegister& src2) {
2445 SimVRegister temp1, temp2;
2446 sxtl(vform, temp1, src1);
2447 sxtl(vform, temp2, src2);
2448 sub(vform, dst, temp1, temp2);
2449 return dst;
2450 }
2451
2452 LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst,
2453 const LogicVRegister& src1,
2454 const LogicVRegister& src2) {
2455 SimVRegister temp1, temp2;
2456 sxtl2(vform, temp1, src1);
2457 sxtl2(vform, temp2, src2);
2458 sub(vform, dst, temp1, temp2);
2459 return dst;
2460 }
2461
2462 LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst,
2463 const LogicVRegister& src1,
2464 const LogicVRegister& src2) {
2465 SimVRegister temp;
2466 sxtl(vform, temp, src2);
2467 sub(vform, dst, src1, temp);
2468 return dst;
2469 }
2470
2471 LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst,
2472 const LogicVRegister& src1,
2473 const LogicVRegister& src2) {
2474 SimVRegister temp;
2475 sxtl2(vform, temp, src2);
2476 sub(vform, dst, src1, temp);
2477 return dst;
2478 }
2479
2480 LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst,
2481 const LogicVRegister& src1,
2482 const LogicVRegister& src2) {
2483 SimVRegister temp1, temp2;
2484 uxtl(vform, temp1, src1);
2485 uxtl(vform, temp2, src2);
2486 uaba(vform, dst, temp1, temp2);
2487 return dst;
2488 }
2489
2490 LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst,
2491 const LogicVRegister& src1,
2492 const LogicVRegister& src2) {
2493 SimVRegister temp1, temp2;
2494 uxtl2(vform, temp1, src1);
2495 uxtl2(vform, temp2, src2);
2496 uaba(vform, dst, temp1, temp2);
2497 return dst;
2498 }
2499
2500 LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst,
2501 const LogicVRegister& src1,
2502 const LogicVRegister& src2) {
2503 SimVRegister temp1, temp2;
2504 sxtl(vform, temp1, src1);
2505 sxtl(vform, temp2, src2);
2506 saba(vform, dst, temp1, temp2);
2507 return dst;
2508 }
2509
2510 LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst,
2511 const LogicVRegister& src1,
2512 const LogicVRegister& src2) {
2513 SimVRegister temp1, temp2;
2514 sxtl2(vform, temp1, src1);
2515 sxtl2(vform, temp2, src2);
2516 saba(vform, dst, temp1, temp2);
2517 return dst;
2518 }
2519
2520 LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst,
2521 const LogicVRegister& src1,
2522 const LogicVRegister& src2) {
2523 SimVRegister temp1, temp2;
2524 uxtl(vform, temp1, src1);
2525 uxtl(vform, temp2, src2);
2526 AbsDiff(vform, dst, temp1, temp2, false);
2527 return dst;
2528 }
2529
2530 LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst,
2531 const LogicVRegister& src1,
2532 const LogicVRegister& src2) {
2533 SimVRegister temp1, temp2;
2534 uxtl2(vform, temp1, src1);
2535 uxtl2(vform, temp2, src2);
2536 AbsDiff(vform, dst, temp1, temp2, false);
2537 return dst;
2538 }
2539
2540 LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst,
2541 const LogicVRegister& src1,
2542 const LogicVRegister& src2) {
2543 SimVRegister temp1, temp2;
2544 sxtl(vform, temp1, src1);
2545 sxtl(vform, temp2, src2);
2546 AbsDiff(vform, dst, temp1, temp2, true);
2547 return dst;
2548 }
2549
2550 LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst,
2551 const LogicVRegister& src1,
2552 const LogicVRegister& src2) {
2553 SimVRegister temp1, temp2;
2554 sxtl2(vform, temp1, src1);
2555 sxtl2(vform, temp2, src2);
2556 AbsDiff(vform, dst, temp1, temp2, true);
2557 return dst;
2558 }
2559
2560 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
2561 const LogicVRegister& src1,
2562 const LogicVRegister& src2) {
2563 SimVRegister temp1, temp2;
2564 uxtl(vform, temp1, src1);
2565 uxtl(vform, temp2, src2);
2566 mul(vform, dst, temp1, temp2);
2567 return dst;
2568 }
2569
2570 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
2571 const LogicVRegister& src1,
2572 const LogicVRegister& src2) {
2573 SimVRegister temp1, temp2;
2574 uxtl2(vform, temp1, src1);
2575 uxtl2(vform, temp2, src2);
2576 mul(vform, dst, temp1, temp2);
2577 return dst;
2578 }
2579
2580 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
2581 const LogicVRegister& src1,
2582 const LogicVRegister& src2) {
2583 SimVRegister temp1, temp2;
2584 sxtl(vform, temp1, src1);
2585 sxtl(vform, temp2, src2);
2586 mul(vform, dst, temp1, temp2);
2587 return dst;
2588 }
2589
2590 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
2591 const LogicVRegister& src1,
2592 const LogicVRegister& src2) {
2593 SimVRegister temp1, temp2;
2594 sxtl2(vform, temp1, src1);
2595 sxtl2(vform, temp2, src2);
2596 mul(vform, dst, temp1, temp2);
2597 return dst;
2598 }
2599
2600 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
2601 const LogicVRegister& src1,
2602 const LogicVRegister& src2) {
2603 SimVRegister temp1, temp2;
2604 uxtl(vform, temp1, src1);
2605 uxtl(vform, temp2, src2);
2606 mls(vform, dst, temp1, temp2);
2607 return dst;
2608 }
2609
2610 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
2611 const LogicVRegister& src1,
2612 const LogicVRegister& src2) {
2613 SimVRegister temp1, temp2;
2614 uxtl2(vform, temp1, src1);
2615 uxtl2(vform, temp2, src2);
2616 mls(vform, dst, temp1, temp2);
2617 return dst;
2618 }
2619
2620 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
2621 const LogicVRegister& src1,
2622 const LogicVRegister& src2) {
2623 SimVRegister temp1, temp2;
2624 sxtl(vform, temp1, src1);
2625 sxtl(vform, temp2, src2);
2626 mls(vform, dst, temp1, temp2);
2627 return dst;
2628 }
2629
2630 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
2631 const LogicVRegister& src1,
2632 const LogicVRegister& src2) {
2633 SimVRegister temp1, temp2;
2634 sxtl2(vform, temp1, src1);
2635 sxtl2(vform, temp2, src2);
2636 mls(vform, dst, temp1, temp2);
2637 return dst;
2638 }
2639
2640 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
2641 const LogicVRegister& src1,
2642 const LogicVRegister& src2) {
2643 SimVRegister temp1, temp2;
2644 uxtl(vform, temp1, src1);
2645 uxtl(vform, temp2, src2);
2646 mla(vform, dst, temp1, temp2);
2647 return dst;
2648 }
2649
2650 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
2651 const LogicVRegister& src1,
2652 const LogicVRegister& src2) {
2653 SimVRegister temp1, temp2;
2654 uxtl2(vform, temp1, src1);
2655 uxtl2(vform, temp2, src2);
2656 mla(vform, dst, temp1, temp2);
2657 return dst;
2658 }
2659
2660 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
2661 const LogicVRegister& src1,
2662 const LogicVRegister& src2) {
2663 SimVRegister temp1, temp2;
2664 sxtl(vform, temp1, src1);
2665 sxtl(vform, temp2, src2);
2666 mla(vform, dst, temp1, temp2);
2667 return dst;
2668 }
2669
2670 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
2671 const LogicVRegister& src1,
2672 const LogicVRegister& src2) {
2673 SimVRegister temp1, temp2;
2674 sxtl2(vform, temp1, src1);
2675 sxtl2(vform, temp2, src2);
2676 mla(vform, dst, temp1, temp2);
2677 return dst;
2678 }
2679
2680 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
2681 const LogicVRegister& src1,
2682 const LogicVRegister& src2) {
2683 SimVRegister temp;
2684 LogicVRegister product = sqdmull(vform, temp, src1, src2);
2685 return add(vform, dst, dst, product).SignedSaturate(vform);
2686 }
2687
2688 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
2689 const LogicVRegister& src1,
2690 const LogicVRegister& src2) {
2691 SimVRegister temp;
2692 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2693 return add(vform, dst, dst, product).SignedSaturate(vform);
2694 }
2695
2696 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
2697 const LogicVRegister& src1,
2698 const LogicVRegister& src2) {
2699 SimVRegister temp;
2700 LogicVRegister product = sqdmull(vform, temp, src1, src2);
2701 return sub(vform, dst, dst, product).SignedSaturate(vform);
2702 }
2703
2704 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
2705 const LogicVRegister& src1,
2706 const LogicVRegister& src2) {
2707 SimVRegister temp;
2708 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2709 return sub(vform, dst, dst, product).SignedSaturate(vform);
2710 }
2711
2712 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
2713 const LogicVRegister& src1,
2714 const LogicVRegister& src2) {
2715 SimVRegister temp;
2716 LogicVRegister product = smull(vform, temp, src1, src2);
2717 return add(vform, dst, product, product).SignedSaturate(vform);
2718 }
2719
2720 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
2721 const LogicVRegister& src1,
2722 const LogicVRegister& src2) {
2723 SimVRegister temp;
2724 LogicVRegister product = smull2(vform, temp, src1, src2);
2725 return add(vform, dst, product, product).SignedSaturate(vform);
2726 }
2727
2728 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
2729 const LogicVRegister& src1,
2730 const LogicVRegister& src2, bool round) {
2731 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
2732 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
2733 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
2734
2735 int esize = LaneSizeInBitsFromFormat(vform);
2736 int round_const = round ? (1 << (esize - 2)) : 0;
2737 int64_t product;
2738
2739 dst.ClearForWrite(vform);
2740 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2741 product = src1.Int(vform, i) * src2.Int(vform, i);
2742 product += round_const;
2743 product = product >> (esize - 1);
2744
2745 if (product > MaxIntFromFormat(vform)) {
2746 product = MaxIntFromFormat(vform);
2747 } else if (product < MinIntFromFormat(vform)) {
2748 product = MinIntFromFormat(vform);
2749 }
2750 dst.SetInt(vform, i, product);
2751 }
2752 return dst;
2753 }
2754
2755 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
2756 const LogicVRegister& src1,
2757 const LogicVRegister& src2) {
2758 return sqrdmulh(vform, dst, src1, src2, false);
2759 }
2760
2761 LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst,
2762 const LogicVRegister& src1,
2763 const LogicVRegister& src2) {
2764 SimVRegister temp;
2765 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2766 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2767 return dst;
2768 }
2769
2770 LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst,
2771 const LogicVRegister& src1,
2772 const LogicVRegister& src2) {
2773 SimVRegister temp;
2774 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2775 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2776 return dst;
2777 }
2778
2779 LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst,
2780 const LogicVRegister& src1,
2781 const LogicVRegister& src2) {
2782 SimVRegister temp;
2783 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2784 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2785 return dst;
2786 }
2787
2788 LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst,
2789 const LogicVRegister& src1,
2790 const LogicVRegister& src2) {
2791 SimVRegister temp;
2792 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2793 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2794 return dst;
2795 }
2796
2797 LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst,
2798 const LogicVRegister& src1,
2799 const LogicVRegister& src2) {
2800 SimVRegister temp;
2801 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2802 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2803 return dst;
2804 }
2805
2806 LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst,
2807 const LogicVRegister& src1,
2808 const LogicVRegister& src2) {
2809 SimVRegister temp;
2810 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2811 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2812 return dst;
2813 }
2814
2815 LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst,
2816 const LogicVRegister& src1,
2817 const LogicVRegister& src2) {
2818 SimVRegister temp;
2819 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2820 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2821 return dst;
2822 }
2823
2824 LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst,
2825 const LogicVRegister& src1,
2826 const LogicVRegister& src2) {
2827 SimVRegister temp;
2828 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2829 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2830 return dst;
2831 }
2832
2833 LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst,
2834 const LogicVRegister& src1,
2835 const LogicVRegister& src2) {
2836 uint64_t result[16];
2837 int laneCount = LaneCountFromFormat(vform);
2838 int pairs = laneCount / 2;
2839 for (int i = 0; i < pairs; ++i) {
2840 result[2 * i] = src1.Uint(vform, 2 * i);
2841 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
2842 }
2843
2844 dst.SetUintArray(vform, result);
2845 return dst;
2846 }
2847
2848 LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst,
2849 const LogicVRegister& src1,
2850 const LogicVRegister& src2) {
2851 uint64_t result[16];
2852 int laneCount = LaneCountFromFormat(vform);
2853 int pairs = laneCount / 2;
2854 for (int i = 0; i < pairs; ++i) {
2855 result[2 * i] = src1.Uint(vform, (2 * i) + 1);
2856 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
2857 }
2858
2859 dst.SetUintArray(vform, result);
2860 return dst;
2861 }
2862
2863 LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst,
2864 const LogicVRegister& src1,
2865 const LogicVRegister& src2) {
2866 uint64_t result[16];
2867 int laneCount = LaneCountFromFormat(vform);
2868 int pairs = laneCount / 2;
2869 for (int i = 0; i < pairs; ++i) {
2870 result[2 * i] = src1.Uint(vform, i);
2871 result[(2 * i) + 1] = src2.Uint(vform, i);
2872 }
2873
2874 dst.SetUintArray(vform, result);
2875 return dst;
2876 }
2877
2878 LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst,
2879 const LogicVRegister& src1,
2880 const LogicVRegister& src2) {
2881 uint64_t result[16];
2882 int laneCount = LaneCountFromFormat(vform);
2883 int pairs = laneCount / 2;
2884 for (int i = 0; i < pairs; ++i) {
2885 result[2 * i] = src1.Uint(vform, pairs + i);
2886 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
2887 }
2888
2889 dst.SetUintArray(vform, result);
2890 return dst;
2891 }
2892
2893 LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst,
2894 const LogicVRegister& src1,
2895 const LogicVRegister& src2) {
2896 uint64_t result[32];
2897 int laneCount = LaneCountFromFormat(vform);
2898 for (int i = 0; i < laneCount; ++i) {
2899 result[i] = src1.Uint(vform, i);
2900 result[laneCount + i] = src2.Uint(vform, i);
2901 }
2902
2903 dst.ClearForWrite(vform);
2904 for (int i = 0; i < laneCount; ++i) {
2905 dst.SetUint(vform, i, result[2 * i]);
2906 }
2907 return dst;
2908 }
2909
2910 LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst,
2911 const LogicVRegister& src1,
2912 const LogicVRegister& src2) {
2913 uint64_t result[32];
2914 int laneCount = LaneCountFromFormat(vform);
2915 for (int i = 0; i < laneCount; ++i) {
2916 result[i] = src1.Uint(vform, i);
2917 result[laneCount + i] = src2.Uint(vform, i);
2918 }
2919
2920 dst.ClearForWrite(vform);
2921 for (int i = 0; i < laneCount; ++i) {
2922 dst.SetUint(vform, i, result[(2 * i) + 1]);
2923 }
2924 return dst;
2925 }
2926
2927 template <typename T>
2928 T Simulator::FPAdd(T op1, T op2) {
2929 T result = FPProcessNaNs(op1, op2);
2930 if (std::isnan(result)) return result;
2931
2932 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
2933 // inf + -inf returns the default NaN.
2934 FPProcessException();
2935 return FPDefaultNaN<T>();
2936 } else {
2937 // Other cases should be handled by standard arithmetic.
2938 return op1 + op2;
2939 }
2940 }
2941
2942 template <typename T>
2943 T Simulator::FPSub(T op1, T op2) {
2944 // NaNs should be handled elsewhere.
2945 DCHECK(!std::isnan(op1) && !std::isnan(op2));
2946
2947 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
2948 // inf - inf returns the default NaN.
2949 FPProcessException();
2950 return FPDefaultNaN<T>();
2951 } else {
2952 // Other cases should be handled by standard arithmetic.
2953 return op1 - op2;
2954 }
2955 }
2956
2957 template <typename T>
2958 T Simulator::FPMul(T op1, T op2) {
2959 // NaNs should be handled elsewhere.
2960 DCHECK(!std::isnan(op1) && !std::isnan(op2));
2961
2962 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2963 // inf * 0.0 returns the default NaN.
2964 FPProcessException();
2965 return FPDefaultNaN<T>();
2966 } else {
2967 // Other cases should be handled by standard arithmetic.
2968 return op1 * op2;
2969 }
2970 }
2971
2972 template <typename T>
2973 T Simulator::FPMulx(T op1, T op2) {
2974 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2975 // inf * 0.0 returns +/-2.0.
2976 T two = 2.0;
2977 return copysign(1.0, op1) * copysign(1.0, op2) * two;
2978 }
2979 return FPMul(op1, op2);
2980 }
2981
2982 template <typename T>
2983 T Simulator::FPMulAdd(T a, T op1, T op2) {
2984 T result = FPProcessNaNs3(a, op1, op2);
2985
2986 T sign_a = copysign(1.0, a);
2987 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
2988 bool isinf_prod = std::isinf(op1) || std::isinf(op2);
2989 bool operation_generates_nan =
2990 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0
2991 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf
2992 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
2993
2994 if (std::isnan(result)) {
2995 // Generated NaNs override quiet NaNs propagated from a.
2996 if (operation_generates_nan && IsQuietNaN(a)) {
2997 FPProcessException();
2998 return FPDefaultNaN<T>();
2999 } else {
3000 return result;
3001 }
3002 }
3003
3004 // If the operation would produce a NaN, return the default NaN.
3005 if (operation_generates_nan) {
3006 FPProcessException();
3007 return FPDefaultNaN<T>();
3008 }
3009
3010 // Work around broken fma implementations for exact zero results: The sign of
3011 // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3012 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3013 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3014 }
3015
3016 result = FusedMultiplyAdd(op1, op2, a);
3017 DCHECK(!std::isnan(result));
3018
3019 // Work around broken fma implementations for rounded zero results: If a is
3020 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3021 if ((a == 0.0) && (result == 0.0)) {
3022 return copysign(0.0, sign_prod);
3023 }
3024
3025 return result;
3026 }
3027
3028 template <typename T>
3029 T Simulator::FPDiv(T op1, T op2) {
3030 // NaNs should be handled elsewhere.
3031 DCHECK(!std::isnan(op1) && !std::isnan(op2));
3032
3033 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3034 // inf / inf and 0.0 / 0.0 return the default NaN.
3035 FPProcessException();
3036 return FPDefaultNaN<T>();
3037 } else {
3038 if (op2 == 0.0) {
3039 FPProcessException();
3040 if (!std::isnan(op1)) {
3041 double op1_sign = copysign(1.0, op1);
3042 double op2_sign = copysign(1.0, op2);
3043 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
3044 }
3045 }
3046
3047 // Other cases should be handled by standard arithmetic.
3048 return op1 / op2;
3049 }
3050 }
3051
3052 template <typename T>
3053 T Simulator::FPSqrt(T op) {
3054 if (std::isnan(op)) {
3055 return FPProcessNaN(op);
3056 } else if (op < 0.0) {
3057 FPProcessException();
3058 return FPDefaultNaN<T>();
3059 } else {
3060 return sqrt(op);
3061 }
3062 }
3063
3064 template <typename T>
3065 T Simulator::FPMax(T a, T b) {
3066 T result = FPProcessNaNs(a, b);
3067 if (std::isnan(result)) return result;
3068
3069 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3070 // a and b are zero, and the sign differs: return +0.0.
3071 return 0.0;
3072 } else {
3073 return (a > b) ? a : b;
3074 }
3075 }
3076
3077 template <typename T>
3078 T Simulator::FPMaxNM(T a, T b) {
3079 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3080 a = kFP64NegativeInfinity;
3081 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3082 b = kFP64NegativeInfinity;
3083 }
3084
3085 T result = FPProcessNaNs(a, b);
3086 return std::isnan(result) ? result : FPMax(a, b);
3087 }
3088
3089 template <typename T>
3090 T Simulator::FPMin(T a, T b) {
3091 T result = FPProcessNaNs(a, b);
3092 if (std::isnan(result)) return result;
3093
3094 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3095 // a and b are zero, and the sign differs: return -0.0.
3096 return -0.0;
3097 } else {
3098 return (a < b) ? a : b;
3099 }
3100 }
3101
3102 template <typename T>
3103 T Simulator::FPMinNM(T a, T b) {
3104 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3105 a = kFP64PositiveInfinity;
3106 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3107 b = kFP64PositiveInfinity;
3108 }
3109
3110 T result = FPProcessNaNs(a, b);
3111 return std::isnan(result) ? result : FPMin(a, b);
3112 }
3113
3114 template <typename T>
3115 T Simulator::FPRecipStepFused(T op1, T op2) {
3116 const T two = 2.0;
3117 if ((std::isinf(op1) && (op2 == 0.0)) ||
3118 ((op1 == 0.0) && (std::isinf(op2)))) {
3119 return two;
3120 } else if (std::isinf(op1) || std::isinf(op2)) {
3121 // Return +inf if signs match, otherwise -inf.
3122 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3123 : kFP64NegativeInfinity;
3124 } else {
3125 return FusedMultiplyAdd(op1, op2, two);
3126 }
3127 }
3128
3129 template <typename T>
3130 T Simulator::FPRSqrtStepFused(T op1, T op2) {
3131 const T one_point_five = 1.5;
3132 const T two = 2.0;
3133
3134 if ((std::isinf(op1) && (op2 == 0.0)) ||
3135 ((op1 == 0.0) && (std::isinf(op2)))) {
3136 return one_point_five;
3137 } else if (std::isinf(op1) || std::isinf(op2)) {
3138 // Return +inf if signs match, otherwise -inf.
3139 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3140 : kFP64NegativeInfinity;
3141 } else {
3142 // The multiply-add-halve operation must be fully fused, so avoid interim
3143 // rounding by checking which operand can be losslessly divided by two
3144 // before doing the multiply-add.
3145 if (std::isnormal(op1 / two)) {
3146 return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3147 } else if (std::isnormal(op2 / two)) {
3148 return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3149 } else {
3150 // Neither operand is normal after halving: the result is dominated by
3151 // the addition term, so just return that.
3152 return one_point_five;
3153 }
3154 }
3155 }
3156
3157 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3158 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3159 (value == kFP64NegativeInfinity)) {
3160 return value;
3161 } else if (std::isnan(value)) {
3162 return FPProcessNaN(value);
3163 }
3164
3165 double int_result = std::floor(value);
3166 double error = value - int_result;
3167 switch (round_mode) {
3168 case FPTieAway: {
3169 // Take care of correctly handling the range ]-0.5, -0.0], which must
3170 // yield -0.0.
3171 if ((-0.5 < value) && (value < 0.0)) {
3172 int_result = -0.0;
3173
3174 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3175 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3176 // result is positive, round up.
3177 int_result++;
3178 }
3179 break;
3180 }
3181 case FPTieEven: {
3182 // Take care of correctly handling the range [-0.5, -0.0], which must
3183 // yield -0.0.
3184 if ((-0.5 <= value) && (value < 0.0)) {
3185 int_result = -0.0;
3186
3187 // If the error is greater than 0.5, or is equal to 0.5 and the integer
3188 // result is odd, round up.
3189 } else if ((error > 0.5) ||
3190 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3191 int_result++;
3192 }
3193 break;
3194 }
3195 case FPZero: {
3196 // If value>0 then we take floor(value)
3197 // otherwise, ceil(value).
3198 if (value < 0) {
3199 int_result = ceil(value);
3200 }
3201 break;
3202 }
3203 case FPNegativeInfinity: {
3204 // We always use floor(value).
3205 break;
3206 }
3207 case FPPositiveInfinity: {
3208 // Take care of correctly handling the range ]-1.0, -0.0], which must
3209 // yield -0.0.
3210 if ((-1.0 < value) && (value < 0.0)) {
3211 int_result = -0.0;
3212
3213 // If the error is non-zero, round up.
3214 } else if (error > 0.0) {
3215 int_result++;
3216 }
3217 break;
3218 }
3219 default:
3220 UNIMPLEMENTED();
3221 }
3222 return int_result;
3223 }
3224
3225 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3226 value = FPRoundInt(value, rmode);
3227 if (value >= kWMaxInt) {
3228 return kWMaxInt;
3229 } else if (value < kWMinInt) {
3230 return kWMinInt;
3231 }
3232 return std::isnan(value) ? 0 : static_cast<int32_t>(value);
3233 }
3234
3235 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3236 value = FPRoundInt(value, rmode);
3237 if (value >= kXMaxInt) {
3238 return kXMaxInt;
3239 } else if (value < kXMinInt) {
3240 return kXMinInt;
3241 }
3242 return std::isnan(value) ? 0 : static_cast<int64_t>(value);
3243 }
3244
3245 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3246 value = FPRoundInt(value, rmode);
3247 if (value >= kWMaxUInt) {
3248 return kWMaxUInt;
3249 } else if (value < 0.0) {
3250 return 0;
3251 }
3252 return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
3253 }
3254
3255 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3256 value = FPRoundInt(value, rmode);
3257 if (value >= kXMaxUInt) {
3258 return kXMaxUInt;
3259 } else if (value < 0.0) {
3260 return 0;
3261 }
3262 return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
3263 }
3264
3265 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
3266 template <typename T> \
3267 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3268 const LogicVRegister& src1, \
3269 const LogicVRegister& src2) { \
3270 dst.ClearForWrite(vform); \
3271 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
3272 T op1 = src1.Float<T>(i); \
3273 T op2 = src2.Float<T>(i); \
3274 T result; \
3275 if (PROCNAN) { \
3276 result = FPProcessNaNs(op1, op2); \
3277 if (!std::isnan(result)) { \
3278 result = OP(op1, op2); \
3279 } \
3280 } else { \
3281 result = OP(op1, op2); \
3282 } \
3283 dst.SetFloat(i, result); \
3284 } \
3285 return dst; \
3286 } \
3287 \
3288 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3289 const LogicVRegister& src1, \
3290 const LogicVRegister& src2) { \
3291 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { \
3292 FN<float>(vform, dst, src1, src2); \
3293 } else { \
3294 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); \
3295 FN<double>(vform, dst, src1, src2); \
3296 } \
3297 return dst; \
3298 }
3299 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3300 #undef DEFINE_NEON_FP_VECTOR_OP
3301
3302 LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst,
3303 const LogicVRegister& src1,
3304 const LogicVRegister& src2) {
3305 SimVRegister temp;
3306 LogicVRegister product = fmul(vform, temp, src1, src2);
3307 return fneg(vform, dst, product);
3308 }
3309
3310 template <typename T>
3311 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3312 const LogicVRegister& src1,
3313 const LogicVRegister& src2) {
3314 dst.ClearForWrite(vform);
3315 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3316 T op1 = -src1.Float<T>(i);
3317 T op2 = src2.Float<T>(i);
3318 T result = FPProcessNaNs(op1, op2);
3319 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
3320 }
3321 return dst;
3322 }
3323
3324 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3325 const LogicVRegister& src1,
3326 const LogicVRegister& src2) {
3327 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3328 frecps<float>(vform, dst, src1, src2);
3329 } else {
3330 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3331 frecps<double>(vform, dst, src1, src2);
3332 }
3333 return dst;
3334 }
3335
3336 template <typename T>
3337 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3338 const LogicVRegister& src1,
3339 const LogicVRegister& src2) {
3340 dst.ClearForWrite(vform);
3341 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3342 T op1 = -src1.Float<T>(i);
3343 T op2 = src2.Float<T>(i);
3344 T result = FPProcessNaNs(op1, op2);
3345 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
3346 }
3347 return dst;
3348 }
3349
3350 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3351 const LogicVRegister& src1,
3352 const LogicVRegister& src2) {
3353 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3354 frsqrts<float>(vform, dst, src1, src2);
3355 } else {
3356 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3357 frsqrts<double>(vform, dst, src1, src2);
3358 }
3359 return dst;
3360 }
3361
3362 template <typename T>
3363 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3364 const LogicVRegister& src1,
3365 const LogicVRegister& src2, Condition cond) {
3366 dst.ClearForWrite(vform);
3367 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3368 bool result = false;
3369 T op1 = src1.Float<T>(i);
3370 T op2 = src2.Float<T>(i);
3371 T nan_result = FPProcessNaNs(op1, op2);
3372 if (!std::isnan(nan_result)) {
3373 switch (cond) {
3374 case eq:
3375 result = (op1 == op2);
3376 break;
3377 case ge:
3378 result = (op1 >= op2);
3379 break;
3380 case gt:
3381 result = (op1 > op2);
3382 break;
3383 case le:
3384 result = (op1 <= op2);
3385 break;
3386 case lt:
3387 result = (op1 < op2);
3388 break;
3389 default:
3390 UNREACHABLE();
3391 break;
3392 }
3393 }
3394 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
3395 }
3396 return dst;
3397 }
3398
3399 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3400 const LogicVRegister& src1,
3401 const LogicVRegister& src2, Condition cond) {
3402 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3403 fcmp<float>(vform, dst, src1, src2, cond);
3404 } else {
3405 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3406 fcmp<double>(vform, dst, src1, src2, cond);
3407 }
3408 return dst;
3409 }
3410
3411 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst,
3412 const LogicVRegister& src, Condition cond) {
3413 SimVRegister temp;
3414 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3415 LogicVRegister zero_reg =
3416 dup_immediate(vform, temp, bit_cast<uint32_t>(0.0f));
3417 fcmp<float>(vform, dst, src, zero_reg, cond);
3418 } else {
3419 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3420 LogicVRegister zero_reg =
3421 dup_immediate(vform, temp, bit_cast<uint64_t>(0.0));
3422 fcmp<double>(vform, dst, src, zero_reg, cond);
3423 }
3424 return dst;
3425 }
3426
3427 LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst,
3428 const LogicVRegister& src1,
3429 const LogicVRegister& src2, Condition cond) {
3430 SimVRegister temp1, temp2;
3431 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3432 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
3433 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
3434 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
3435 } else {
3436 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3437 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
3438 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
3439 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
3440 }
3441 return dst;
3442 }
3443
3444 template <typename T>
3445 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3446 const LogicVRegister& src1,
3447 const LogicVRegister& src2) {
3448 dst.ClearForWrite(vform);
3449 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3450 T op1 = src1.Float<T>(i);
3451 T op2 = src2.Float<T>(i);
3452 T acc = dst.Float<T>(i);
3453 T result = FPMulAdd(acc, op1, op2);
3454 dst.SetFloat(i, result);
3455 }
3456 return dst;
3457 }
3458
3459 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3460 const LogicVRegister& src1,
3461 const LogicVRegister& src2) {
3462 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3463 fmla<float>(vform, dst, src1, src2);
3464 } else {
3465 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3466 fmla<double>(vform, dst, src1, src2);
3467 }
3468 return dst;
3469 }
3470
3471 template <typename T>
3472 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3473 const LogicVRegister& src1,
3474 const LogicVRegister& src2) {
3475 dst.ClearForWrite(vform);
3476 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3477 T op1 = -src1.Float<T>(i);
3478 T op2 = src2.Float<T>(i);
3479 T acc = dst.Float<T>(i);
3480 T result = FPMulAdd(acc, op1, op2);
3481 dst.SetFloat(i, result);
3482 }
3483 return dst;
3484 }
3485
3486 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3487 const LogicVRegister& src1,
3488 const LogicVRegister& src2) {
3489 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3490 fmls<float>(vform, dst, src1, src2);
3491 } else {
3492 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3493 fmls<double>(vform, dst, src1, src2);
3494 }
3495 return dst;
3496 }
3497
3498 template <typename T>
3499 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3500 const LogicVRegister& src) {
3501 dst.ClearForWrite(vform);
3502 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3503 T op = src.Float<T>(i);
3504 op = -op;
3505 dst.SetFloat(i, op);
3506 }
3507 return dst;
3508 }
3509
3510 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3511 const LogicVRegister& src) {
3512 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3513 fneg<float>(vform, dst, src);
3514 } else {
3515 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3516 fneg<double>(vform, dst, src);
3517 }
3518 return dst;
3519 }
3520
3521 template <typename T>
3522 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3523 const LogicVRegister& src) {
3524 dst.ClearForWrite(vform);
3525 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3526 T op = src.Float<T>(i);
3527 if (copysign(1.0, op) < 0.0) {
3528 op = -op;
3529 }
3530 dst.SetFloat(i, op);
3531 }
3532 return dst;
3533 }
3534
3535 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3536 const LogicVRegister& src) {
3537 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3538 fabs_<float>(vform, dst, src);
3539 } else {
3540 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3541 fabs_<double>(vform, dst, src);
3542 }
3543 return dst;
3544 }
3545
3546 LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst,
3547 const LogicVRegister& src1,
3548 const LogicVRegister& src2) {
3549 SimVRegister temp;
3550 fsub(vform, temp, src1, src2);
3551 fabs_(vform, dst, temp);
3552 return dst;
3553 }
3554
3555 LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst,
3556 const LogicVRegister& src) {
3557 dst.ClearForWrite(vform);
3558 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3559 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3560 float result = FPSqrt(src.Float<float>(i));
3561 dst.SetFloat(i, result);
3562 }
3563 } else {
3564 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3565 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3566 double result = FPSqrt(src.Float<double>(i));
3567 dst.SetFloat(i, result);
3568 }
3569 }
3570 return dst;
3571 }
3572
3573 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
3574 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3575 const LogicVRegister& src1, \
3576 const LogicVRegister& src2) { \
3577 SimVRegister temp1, temp2; \
3578 uzp1(vform, temp1, src1, src2); \
3579 uzp2(vform, temp2, src1, src2); \
3580 FN(vform, dst, temp1, temp2); \
3581 return dst; \
3582 } \
3583 \
3584 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3585 const LogicVRegister& src) { \
3586 if (vform == kFormatS) { \
3587 float result = OP(src.Float<float>(0), src.Float<float>(1)); \
3588 dst.SetFloat(0, result); \
3589 } else { \
3590 DCHECK_EQ(vform, kFormatD); \
3591 double result = OP(src.Float<double>(0), src.Float<double>(1)); \
3592 dst.SetFloat(0, result); \
3593 } \
3594 dst.ClearForWrite(vform); \
3595 return dst; \
3596 }
3597 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
3598 #undef DEFINE_NEON_FP_PAIR_OP
3599
3600 LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst,
3601 const LogicVRegister& src, FPMinMaxOp Op) {
3602 DCHECK_EQ(vform, kFormat4S);
3603 USE(vform);
3604 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
3605 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
3606 float result = (this->*Op)(result1, result2);
3607 dst.ClearForWrite(kFormatS);
3608 dst.SetFloat<float>(0, result);
3609 return dst;
3610 }
3611
3612 LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst,
3613 const LogicVRegister& src) {
3614 return FMinMaxV(vform, dst, src, &Simulator::FPMax);
3615 }
3616
3617 LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst,
3618 const LogicVRegister& src) {
3619 return FMinMaxV(vform, dst, src, &Simulator::FPMin);
3620 }
3621
3622 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst,
3623 const LogicVRegister& src) {
3624 return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM);
3625 }
3626
3627 LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst,
3628 const LogicVRegister& src) {
3629 return FMinMaxV(vform, dst, src, &Simulator::FPMinNM);
3630 }
3631
3632 LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst,
3633 const LogicVRegister& src1,
3634 const LogicVRegister& src2, int index) {
3635 dst.ClearForWrite(vform);
3636 SimVRegister temp;
3637 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3638 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3639 fmul<float>(vform, dst, src1, index_reg);
3640 } else {
3641 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3642 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3643 fmul<double>(vform, dst, src1, index_reg);
3644 }
3645 return dst;
3646 }
3647
3648 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3649 const LogicVRegister& src1,
3650 const LogicVRegister& src2, int index) {
3651 dst.ClearForWrite(vform);
3652 SimVRegister temp;
3653 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3654 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3655 fmla<float>(vform, dst, src1, index_reg);
3656 } else {
3657 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3658 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3659 fmla<double>(vform, dst, src1, index_reg);
3660 }
3661 return dst;
3662 }
3663
3664 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3665 const LogicVRegister& src1,
3666 const LogicVRegister& src2, int index) {
3667 dst.ClearForWrite(vform);
3668 SimVRegister temp;
3669 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3670 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3671 fmls<float>(vform, dst, src1, index_reg);
3672 } else {
3673 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3674 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3675 fmls<double>(vform, dst, src1, index_reg);
3676 }
3677 return dst;
3678 }
3679
3680 LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst,
3681 const LogicVRegister& src1,
3682 const LogicVRegister& src2, int index) {
3683 dst.ClearForWrite(vform);
3684 SimVRegister temp;
3685 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3686 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3687 fmulx<float>(vform, dst, src1, index_reg);
3688
3689 } else {
3690 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3691 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3692 fmulx<double>(vform, dst, src1, index_reg);
3693 }
3694 return dst;
3695 }
3696
3697 LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst,
3698 const LogicVRegister& src,
3699 FPRounding rounding_mode,
3700 bool inexact_exception) {
3701 dst.ClearForWrite(vform);
3702 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3703 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3704 float input = src.Float<float>(i);
3705 float rounded = FPRoundInt(input, rounding_mode);
3706 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3707 FPProcessException();
3708 }
3709 dst.SetFloat<float>(i, rounded);
3710 }
3711 } else {
3712 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3713 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3714 double input = src.Float<double>(i);
3715 double rounded = FPRoundInt(input, rounding_mode);
3716 if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3717 FPProcessException();
3718 }
3719 dst.SetFloat<double>(i, rounded);
3720 }
3721 }
3722 return dst;
3723 }
3724
3725 LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst,
3726 const LogicVRegister& src,
3727 FPRounding rounding_mode, int fbits) {
3728 dst.ClearForWrite(vform);
3729 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3730 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3731 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3732 dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
3733 }
3734 } else {
3735 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3736 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3737 double op = src.Float<double>(i) * std::pow(2.0, fbits);
3738 dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
3739 }
3740 }
3741 return dst;
3742 }
3743
3744 LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst,
3745 const LogicVRegister& src,
3746 FPRounding rounding_mode, int fbits) {
3747 dst.ClearForWrite(vform);
3748 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3749 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3750 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3751 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
3752 }
3753 } else {
3754 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3755 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3756 double op = src.Float<double>(i) * std::pow(2.0, fbits);
3757 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
3758 }
3759 }
3760 return dst;
3761 }
3762
3763 LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst,
3764 const LogicVRegister& src) {
3765 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3766 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3767 dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
3768 }
3769 } else {
3770 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3771 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3772 dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
3773 }
3774 }
3775 return dst;
3776 }
3777
3778 LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst,
3779 const LogicVRegister& src) {
3780 int lane_count = LaneCountFromFormat(vform);
3781 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3782 for (int i = 0; i < lane_count; i++) {
3783 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
3784 }
3785 } else {
3786 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3787 for (int i = 0; i < lane_count; i++) {
3788 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
3789 }
3790 }
3791 return dst;
3792 }
3793
3794 LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst,
3795 const LogicVRegister& src) {
3796 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3797 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3798 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
3799 }
3800 } else {
3801 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3802 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3803 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
3804 }
3805 }
3806 return dst;
3807 }
3808
3809 LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst,
3810 const LogicVRegister& src) {
3811 int lane_count = LaneCountFromFormat(vform) / 2;
3812 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3813 for (int i = lane_count - 1; i >= 0; i--) {
3814 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
3815 }
3816 } else {
3817 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3818 for (int i = lane_count - 1; i >= 0; i--) {
3819 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
3820 }
3821 }
3822 return dst;
3823 }
3824
3825 LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst,
3826 const LogicVRegister& src) {
3827 dst.ClearForWrite(vform);
3828 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3829 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3830 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
3831 }
3832 return dst;
3833 }
3834
3835 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst,
3836 const LogicVRegister& src) {
3837 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3838 int lane_count = LaneCountFromFormat(vform) / 2;
3839 for (int i = lane_count - 1; i >= 0; i--) {
3840 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
3841 }
3842 return dst;
3843 }
3844
3845 // Based on reference C function recip_sqrt_estimate from ARM ARM.
3846 double Simulator::recip_sqrt_estimate(double a) {
3847 int q0, q1, s;
3848 double r;
3849 if (a < 0.5) {
3850 q0 = static_cast<int>(a * 512.0);
3851 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
3852 } else {
3853 q1 = static_cast<int>(a * 256.0);
3854 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
3855 }
3856 s = static_cast<int>(256.0 * r + 0.5);
3857 return static_cast<double>(s) / 256.0;
3858 }
3859
3860 namespace {
3861
3862 inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
3863 return unsigned_bitextract_64(start_bit, end_bit, val);
3864 }
3865
3866 } // anonymous namespace
3867
3868 template <typename T>
3869 T Simulator::FPRecipSqrtEstimate(T op) {
3870 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
3871 "T must be a float or double");
3872
3873 if (std::isnan(op)) {
3874 return FPProcessNaN(op);
3875 } else if (op == 0.0) {
3876 if (copysign(1.0, op) < 0.0) {
3877 return kFP64NegativeInfinity;
3878 } else {
3879 return kFP64PositiveInfinity;
3880 }
3881 } else if (copysign(1.0, op) < 0.0) {
3882 FPProcessException();
3883 return FPDefaultNaN<T>();
3884 } else if (std::isinf(op)) {
3885 return 0.0;
3886 } else {
3887 uint64_t fraction;
3888 int32_t exp, result_exp;
3889
3890 if (sizeof(T) == sizeof(float)) {
3891 exp = static_cast<int32_t>(float_exp(op));
3892 fraction = float_mantissa(op);
3893 fraction <<= 29;
3894 } else {
3895 exp = static_cast<int32_t>(double_exp(op));
3896 fraction = double_mantissa(op);
3897 }
3898
3899 if (exp == 0) {
3900 while (Bits(fraction, 51, 51) == 0) {
3901 fraction = Bits(fraction, 50, 0) << 1;
3902 exp -= 1;
3903 }
3904 fraction = Bits(fraction, 50, 0) << 1;
3905 }
3906
3907 double scaled;
3908 if (Bits(exp, 0, 0) == 0) {
3909 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
3910 } else {
3911 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);
3912 }
3913
3914 if (sizeof(T) == sizeof(float)) {
3915 result_exp = (380 - exp) / 2;
3916 } else {
3917 result_exp = (3068 - exp) / 2;
3918 }
3919
3920 uint64_t estimate = bit_cast<uint64_t>(recip_sqrt_estimate(scaled));
3921
3922 if (sizeof(T) == sizeof(float)) {
3923 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
3924 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
3925 return float_pack(0, exp_bits, est_bits);
3926 } else {
3927 return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
3928 }
3929 }
3930 }
3931
3932 LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst,
3933 const LogicVRegister& src) {
3934 dst.ClearForWrite(vform);
3935 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3936 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3937 float input = src.Float<float>(i);
3938 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
3939 }
3940 } else {
3941 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3942 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3943 double input = src.Float<double>(i);
3944 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
3945 }
3946 }
3947 return dst;
3948 }
3949
3950 template <typename T>
3951 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
3952 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
3953 "T must be a float or double");
3954 uint32_t sign;
3955
3956 if (sizeof(T) == sizeof(float)) {
3957 sign = float_sign(op);
3958 } else {
3959 sign = double_sign(op);
3960 }
3961
3962 if (std::isnan(op)) {
3963 return FPProcessNaN(op);
3964 } else if (std::isinf(op)) {
3965 return (sign == 1) ? -0.0 : 0.0;
3966 } else if (op == 0.0) {
3967 FPProcessException(); // FPExc_DivideByZero exception.
3968 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
3969 } else if (((sizeof(T) == sizeof(float)) &&
3970 (std::fabs(op) < std::pow(2.0, -128.0))) ||
3971 ((sizeof(T) == sizeof(double)) &&
3972 (std::fabs(op) < std::pow(2.0, -1024.0)))) {
3973 bool overflow_to_inf = false;
3974 switch (rounding) {
3975 case FPTieEven:
3976 overflow_to_inf = true;
3977 break;
3978 case FPPositiveInfinity:
3979 overflow_to_inf = (sign == 0);
3980 break;
3981 case FPNegativeInfinity:
3982 overflow_to_inf = (sign == 1);
3983 break;
3984 case FPZero:
3985 overflow_to_inf = false;
3986 break;
3987 default:
3988 break;
3989 }
3990 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
3991 if (overflow_to_inf) {
3992 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
3993 } else {
3994 // Return FPMaxNormal(sign).
3995 if (sizeof(T) == sizeof(float)) {
3996 return float_pack(sign, 0xfe, 0x07fffff);
3997 } else {
3998 return double_pack(sign, 0x7fe, 0x0fffffffffffffl);
3999 }
4000 }
4001 } else {
4002 uint64_t fraction;
4003 int32_t exp, result_exp;
4004 uint32_t sign;
4005
4006 if (sizeof(T) == sizeof(float)) {
4007 sign = float_sign(op);
4008 exp = static_cast<int32_t>(float_exp(op));
4009 fraction = float_mantissa(op);
4010 fraction <<= 29;
4011 } else {
4012 sign = double_sign(op);
4013 exp = static_cast<int32_t>(double_exp(op));
4014 fraction = double_mantissa(op);
4015 }
4016
4017 if (exp == 0) {
4018 if (Bits(fraction, 51, 51) == 0) {
4019 exp -= 1;
4020 fraction = Bits(fraction, 49, 0) << 2;
4021 } else {
4022 fraction = Bits(fraction, 50, 0) << 1;
4023 }
4024 }
4025
4026 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4027
4028 if (sizeof(T) == sizeof(float)) {
4029 result_exp = 253 - exp;
4030 } else {
4031 result_exp = 2045 - exp;
4032 }
4033
4034 double estimate = recip_estimate(scaled);
4035
4036 fraction = double_mantissa(estimate);
4037 if (result_exp == 0) {
4038 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4039 } else if (result_exp == -1) {
4040 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4041 result_exp = 0;
4042 }
4043 if (sizeof(T) == sizeof(float)) {
4044 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4045 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
4046 return float_pack(sign, exp_bits, frac_bits);
4047 } else {
4048 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4049 }
4050 }
4051 }
4052
4053 LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst,
4054 const LogicVRegister& src, FPRounding round) {
4055 dst.ClearForWrite(vform);
4056 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4057 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4058 float input = src.Float<float>(i);
4059 dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4060 }
4061 } else {
4062 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4063 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4064 double input = src.Float<double>(i);
4065 dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4066 }
4067 }
4068 return dst;
4069 }
4070
4071 LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst,
4072 const LogicVRegister& src) {
4073 dst.ClearForWrite(vform);
4074 uint64_t operand;
4075 uint32_t result;
4076 double dp_operand, dp_result;
4077 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4078 operand = src.Uint(vform, i);
4079 if (operand <= 0x3FFFFFFF) {
4080 result = 0xFFFFFFFF;
4081 } else {
4082 dp_operand = operand * std::pow(2.0, -32);
4083 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4084 result = static_cast<uint32_t>(dp_result);
4085 }
4086 dst.SetUint(vform, i, result);
4087 }
4088 return dst;
4089 }
4090
4091 // Based on reference C function recip_estimate from ARM ARM.
4092 double Simulator::recip_estimate(double a) {
4093 int q, s;
4094 double r;
4095 q = static_cast<int>(a * 512.0);
4096 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4097 s = static_cast<int>(256.0 * r + 0.5);
4098 return static_cast<double>(s) / 256.0;
4099 }
4100
4101 LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst,
4102 const LogicVRegister& src) {
4103 dst.ClearForWrite(vform);
4104 uint64_t operand;
4105 uint32_t result;
4106 double dp_operand, dp_result;
4107 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4108 operand = src.Uint(vform, i);
4109 if (operand <= 0x7FFFFFFF) {
4110 result = 0xFFFFFFFF;
4111 } else {
4112 dp_operand = operand * std::pow(2.0, -32);
4113 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4114 result = static_cast<uint32_t>(dp_result);
4115 }
4116 dst.SetUint(vform, i, result);
4117 }
4118 return dst;
4119 }
4120
4121 template <typename T>
4122 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4123 const LogicVRegister& src) {
4124 dst.ClearForWrite(vform);
4125 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4126 T op = src.Float<T>(i);
4127 T result;
4128 if (std::isnan(op)) {
4129 result = FPProcessNaN(op);
4130 } else {
4131 int exp;
4132 uint32_t sign;
4133 if (sizeof(T) == sizeof(float)) {
4134 sign = float_sign(op);
4135 exp = static_cast<int>(float_exp(op));
4136 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
4137 result = float_pack(sign, exp, 0);
4138 } else {
4139 sign = double_sign(op);
4140 exp = static_cast<int>(double_exp(op));
4141 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
4142 result = double_pack(sign, exp, 0);
4143 }
4144 }
4145 dst.SetFloat(i, result);
4146 }
4147 return dst;
4148 }
4149
4150 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4151 const LogicVRegister& src) {
4152 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4153 frecpx<float>(vform, dst, src);
4154 } else {
4155 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4156 frecpx<double>(vform, dst, src);
4157 }
4158 return dst;
4159 }
4160
4161 LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst,
4162 const LogicVRegister& src, int fbits,
4163 FPRounding round) {
4164 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4165 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4166 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4167 dst.SetFloat<float>(i, result);
4168 } else {
4169 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4170 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4171 dst.SetFloat<double>(i, result);
4172 }
4173 }
4174 return dst;
4175 }
4176
4177 LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst,
4178 const LogicVRegister& src, int fbits,
4179 FPRounding round) {
4180 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4181 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4182 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4183 dst.SetFloat<float>(i, result);
4184 } else {
4185 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4186 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4187 dst.SetFloat<double>(i, result);
4188 }
4189 }
4190 return dst;
4191 }
4192
4193 #endif // USE_SIMULATOR
4194
4195 } // namespace internal
4196 } // namespace v8
4197
4198 #endif // V8_TARGET_ARCH_ARM64
OLDNEW
« no previous file with comments | « src/arm64/simulator-arm64.cc ('k') | src/arm64/utils-arm64.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698