OLD | NEW |
| (Empty) |
1 // Copyright 2016 the V8 project authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #if V8_TARGET_ARCH_ARM64 | |
6 | |
7 #include <cmath> | |
8 #include "src/arm64/simulator-arm64.h" | |
9 | |
10 namespace v8 { | |
11 namespace internal { | |
12 | |
13 #if defined(USE_SIMULATOR) | |
14 | |
15 namespace { | |
16 | |
17 // See FPRound for a description of this function. | |
18 inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa, | |
19 FPRounding round_mode) { | |
20 uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>( | |
21 sign, exponent, mantissa, round_mode); | |
22 return bit_cast<double>(bits); | |
23 } | |
24 | |
25 // See FPRound for a description of this function. | |
26 inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa, | |
27 FPRounding round_mode) { | |
28 uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>( | |
29 sign, exponent, mantissa, round_mode); | |
30 return bit_cast<float>(bits); | |
31 } | |
32 | |
33 // See FPRound for a description of this function. | |
34 inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent, | |
35 uint64_t mantissa, FPRounding round_mode) { | |
36 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>( | |
37 sign, exponent, mantissa, round_mode); | |
38 } | |
39 | |
40 } // namespace | |
41 | |
42 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) { | |
43 if (src >= 0) { | |
44 return UFixedToDouble(src, fbits, round); | |
45 } else if (src == INT64_MIN) { | |
46 return -UFixedToDouble(src, fbits, round); | |
47 } else { | |
48 return -UFixedToDouble(-src, fbits, round); | |
49 } | |
50 } | |
51 | |
52 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) { | |
53 // An input of 0 is a special case because the result is effectively | |
54 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. | |
55 if (src == 0) { | |
56 return 0.0; | |
57 } | |
58 | |
59 // Calculate the exponent. The highest significant bit will have the value | |
60 // 2^exponent. | |
61 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64); | |
62 const int64_t exponent = highest_significant_bit - fbits; | |
63 | |
64 return FPRoundToDouble(0, exponent, src, round); | |
65 } | |
66 | |
67 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) { | |
68 if (src >= 0) { | |
69 return UFixedToFloat(src, fbits, round); | |
70 } else if (src == INT64_MIN) { | |
71 return -UFixedToFloat(src, fbits, round); | |
72 } else { | |
73 return -UFixedToFloat(-src, fbits, round); | |
74 } | |
75 } | |
76 | |
77 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) { | |
78 // An input of 0 is a special case because the result is effectively | |
79 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. | |
80 if (src == 0) { | |
81 return 0.0f; | |
82 } | |
83 | |
84 // Calculate the exponent. The highest significant bit will have the value | |
85 // 2^exponent. | |
86 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64); | |
87 const int32_t exponent = highest_significant_bit - fbits; | |
88 | |
89 return FPRoundToFloat(0, exponent, src, round); | |
90 } | |
91 | |
92 double Simulator::FPToDouble(float value) { | |
93 switch (std::fpclassify(value)) { | |
94 case FP_NAN: { | |
95 if (IsSignallingNaN(value)) { | |
96 FPProcessException(); | |
97 } | |
98 if (DN()) return kFP64DefaultNaN; | |
99 | |
100 // Convert NaNs as the processor would: | |
101 // - The sign is propagated. | |
102 // - The mantissa is transferred entirely, except that the top bit is | |
103 // forced to '1', making the result a quiet NaN. The unused (low-order) | |
104 // mantissa bits are set to 0. | |
105 uint32_t raw = bit_cast<uint32_t>(value); | |
106 | |
107 uint64_t sign = raw >> 31; | |
108 uint64_t exponent = (1 << kDoubleExponentBits) - 1; | |
109 uint64_t mantissa = unsigned_bitextract_64(21, 0, raw); | |
110 | |
111 // Unused low-order bits remain zero. | |
112 mantissa <<= (kDoubleMantissaBits - kFloatMantissaBits); | |
113 | |
114 // Force a quiet NaN. | |
115 mantissa |= (UINT64_C(1) << (kDoubleMantissaBits - 1)); | |
116 | |
117 return double_pack(sign, exponent, mantissa); | |
118 } | |
119 | |
120 case FP_ZERO: | |
121 case FP_NORMAL: | |
122 case FP_SUBNORMAL: | |
123 case FP_INFINITE: { | |
124 // All other inputs are preserved in a standard cast, because every value | |
125 // representable using an IEEE-754 float is also representable using an | |
126 // IEEE-754 double. | |
127 return static_cast<double>(value); | |
128 } | |
129 } | |
130 | |
131 UNREACHABLE(); | |
132 return kFP64DefaultNaN; | |
133 } | |
134 | |
135 float Simulator::FPToFloat(float16 value) { | |
136 uint32_t sign = value >> 15; | |
137 uint32_t exponent = | |
138 unsigned_bitextract_32(kFloat16MantissaBits + kFloat16ExponentBits - 1, | |
139 kFloat16MantissaBits, value); | |
140 uint32_t mantissa = | |
141 unsigned_bitextract_32(kFloat16MantissaBits - 1, 0, value); | |
142 | |
143 switch (float16classify(value)) { | |
144 case FP_ZERO: | |
145 return (sign == 0) ? 0.0f : -0.0f; | |
146 | |
147 case FP_INFINITE: | |
148 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity; | |
149 | |
150 case FP_SUBNORMAL: { | |
151 // Calculate shift required to put mantissa into the most-significant bits | |
152 // of the destination mantissa. | |
153 int shift = CountLeadingZeros(mantissa << (32 - 10), 32); | |
154 | |
155 // Shift mantissa and discard implicit '1'. | |
156 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1; | |
157 mantissa &= (1 << kFloatMantissaBits) - 1; | |
158 | |
159 // Adjust the exponent for the shift applied, and rebias. | |
160 exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias); | |
161 break; | |
162 } | |
163 | |
164 case FP_NAN: { | |
165 if (IsSignallingNaN(value)) { | |
166 FPProcessException(); | |
167 } | |
168 if (DN()) return kFP32DefaultNaN; | |
169 | |
170 // Convert NaNs as the processor would: | |
171 // - The sign is propagated. | |
172 // - The mantissa is transferred entirely, except that the top bit is | |
173 // forced to '1', making the result a quiet NaN. The unused (low-order) | |
174 // mantissa bits are set to 0. | |
175 exponent = (1 << kFloatExponentBits) - 1; | |
176 | |
177 // Increase bits in mantissa, making low-order bits 0. | |
178 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); | |
179 mantissa |= 1 << (kFloatMantissaBits - 1); // Force a quiet NaN. | |
180 break; | |
181 } | |
182 | |
183 case FP_NORMAL: { | |
184 // Increase bits in mantissa, making low-order bits 0. | |
185 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); | |
186 | |
187 // Change exponent bias. | |
188 exponent += (kFloatExponentBias - kFloat16ExponentBias); | |
189 break; | |
190 } | |
191 | |
192 default: | |
193 UNREACHABLE(); | |
194 return kFP32DefaultNaN; | |
195 } | |
196 return float_pack(sign, exponent, mantissa); | |
197 } | |
198 | |
199 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) { | |
200 // Only the FPTieEven rounding mode is implemented. | |
201 DCHECK_EQ(round_mode, FPTieEven); | |
202 USE(round_mode); | |
203 | |
204 int64_t sign = float_sign(value); | |
205 int64_t exponent = | |
206 static_cast<int64_t>(float_exp(value)) - kFloatExponentBias; | |
207 uint32_t mantissa = float_mantissa(value); | |
208 | |
209 switch (std::fpclassify(value)) { | |
210 case FP_NAN: { | |
211 if (IsSignallingNaN(value)) { | |
212 FPProcessException(); | |
213 } | |
214 if (DN()) return kFP16DefaultNaN; | |
215 | |
216 // Convert NaNs as the processor would: | |
217 // - The sign is propagated. | |
218 // - The mantissa is transferred as much as possible, except that the top | |
219 // bit is forced to '1', making the result a quiet NaN. | |
220 float16 result = | |
221 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; | |
222 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits); | |
223 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN; | |
224 return result; | |
225 } | |
226 | |
227 case FP_ZERO: | |
228 return (sign == 0) ? 0 : 0x8000; | |
229 | |
230 case FP_INFINITE: | |
231 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; | |
232 | |
233 case FP_NORMAL: | |
234 case FP_SUBNORMAL: { | |
235 // Convert float-to-half as the processor would, assuming that FPCR.FZ | |
236 // (flush-to-zero) is not set. | |
237 | |
238 // Add the implicit '1' bit to the mantissa. | |
239 mantissa += (1 << kFloatMantissaBits); | |
240 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); | |
241 } | |
242 } | |
243 | |
244 UNREACHABLE(); | |
245 return kFP16DefaultNaN; | |
246 } | |
247 | |
248 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) { | |
249 // Only the FPTieEven rounding mode is implemented. | |
250 DCHECK_EQ(round_mode, FPTieEven); | |
251 USE(round_mode); | |
252 | |
253 int64_t sign = double_sign(value); | |
254 int64_t exponent = | |
255 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias; | |
256 uint64_t mantissa = double_mantissa(value); | |
257 | |
258 switch (std::fpclassify(value)) { | |
259 case FP_NAN: { | |
260 if (IsSignallingNaN(value)) { | |
261 FPProcessException(); | |
262 } | |
263 if (DN()) return kFP16DefaultNaN; | |
264 | |
265 // Convert NaNs as the processor would: | |
266 // - The sign is propagated. | |
267 // - The mantissa is transferred as much as possible, except that the top | |
268 // bit is forced to '1', making the result a quiet NaN. | |
269 float16 result = | |
270 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; | |
271 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits); | |
272 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN; | |
273 return result; | |
274 } | |
275 | |
276 case FP_ZERO: | |
277 return (sign == 0) ? 0 : 0x8000; | |
278 | |
279 case FP_INFINITE: | |
280 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; | |
281 | |
282 case FP_NORMAL: | |
283 case FP_SUBNORMAL: { | |
284 // Convert double-to-half as the processor would, assuming that FPCR.FZ | |
285 // (flush-to-zero) is not set. | |
286 | |
287 // Add the implicit '1' bit to the mantissa. | |
288 mantissa += (UINT64_C(1) << kDoubleMantissaBits); | |
289 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); | |
290 } | |
291 } | |
292 | |
293 UNREACHABLE(); | |
294 return kFP16DefaultNaN; | |
295 } | |
296 | |
297 float Simulator::FPToFloat(double value, FPRounding round_mode) { | |
298 // Only the FPTieEven rounding mode is implemented. | |
299 DCHECK((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); | |
300 USE(round_mode); | |
301 | |
302 switch (std::fpclassify(value)) { | |
303 case FP_NAN: { | |
304 if (IsSignallingNaN(value)) { | |
305 FPProcessException(); | |
306 } | |
307 if (DN()) return kFP32DefaultNaN; | |
308 | |
309 // Convert NaNs as the processor would: | |
310 // - The sign is propagated. | |
311 // - The mantissa is transferred as much as possible, except that the | |
312 // top bit is forced to '1', making the result a quiet NaN. | |
313 | |
314 uint64_t raw = bit_cast<uint64_t>(value); | |
315 | |
316 uint32_t sign = raw >> 63; | |
317 uint32_t exponent = (1 << 8) - 1; | |
318 uint32_t mantissa = static_cast<uint32_t>(unsigned_bitextract_64( | |
319 50, kDoubleMantissaBits - kFloatMantissaBits, raw)); | |
320 mantissa |= (1 << (kFloatMantissaBits - 1)); // Force a quiet NaN. | |
321 | |
322 return float_pack(sign, exponent, mantissa); | |
323 } | |
324 | |
325 case FP_ZERO: | |
326 case FP_INFINITE: { | |
327 // In a C++ cast, any value representable in the target type will be | |
328 // unchanged. This is always the case for +/-0.0 and infinities. | |
329 return static_cast<float>(value); | |
330 } | |
331 | |
332 case FP_NORMAL: | |
333 case FP_SUBNORMAL: { | |
334 // Convert double-to-float as the processor would, assuming that FPCR.FZ | |
335 // (flush-to-zero) is not set. | |
336 uint32_t sign = double_sign(value); | |
337 int64_t exponent = | |
338 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias; | |
339 uint64_t mantissa = double_mantissa(value); | |
340 if (std::fpclassify(value) == FP_NORMAL) { | |
341 // For normal FP values, add the hidden bit. | |
342 mantissa |= (UINT64_C(1) << kDoubleMantissaBits); | |
343 } | |
344 return FPRoundToFloat(sign, exponent, mantissa, round_mode); | |
345 } | |
346 } | |
347 | |
348 UNREACHABLE(); | |
349 return kFP32DefaultNaN; | |
350 } | |
351 | |
352 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) { | |
353 dst.ClearForWrite(vform); | |
354 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
355 dst.ReadUintFromMem(vform, i, addr); | |
356 addr += LaneSizeInBytesFromFormat(vform); | |
357 } | |
358 } | |
359 | |
360 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index, | |
361 uint64_t addr) { | |
362 dst.ReadUintFromMem(vform, index, addr); | |
363 } | |
364 | |
365 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) { | |
366 dst.ClearForWrite(vform); | |
367 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
368 dst.ReadUintFromMem(vform, i, addr); | |
369 } | |
370 } | |
371 | |
372 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1, | |
373 LogicVRegister dst2, uint64_t addr1) { | |
374 dst1.ClearForWrite(vform); | |
375 dst2.ClearForWrite(vform); | |
376 int esize = LaneSizeInBytesFromFormat(vform); | |
377 uint64_t addr2 = addr1 + esize; | |
378 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
379 dst1.ReadUintFromMem(vform, i, addr1); | |
380 dst2.ReadUintFromMem(vform, i, addr2); | |
381 addr1 += 2 * esize; | |
382 addr2 += 2 * esize; | |
383 } | |
384 } | |
385 | |
386 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1, | |
387 LogicVRegister dst2, int index, uint64_t addr1) { | |
388 dst1.ClearForWrite(vform); | |
389 dst2.ClearForWrite(vform); | |
390 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); | |
391 dst1.ReadUintFromMem(vform, index, addr1); | |
392 dst2.ReadUintFromMem(vform, index, addr2); | |
393 } | |
394 | |
395 void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1, | |
396 LogicVRegister dst2, uint64_t addr) { | |
397 dst1.ClearForWrite(vform); | |
398 dst2.ClearForWrite(vform); | |
399 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); | |
400 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
401 dst1.ReadUintFromMem(vform, i, addr); | |
402 dst2.ReadUintFromMem(vform, i, addr2); | |
403 } | |
404 } | |
405 | |
406 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1, | |
407 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) { | |
408 dst1.ClearForWrite(vform); | |
409 dst2.ClearForWrite(vform); | |
410 dst3.ClearForWrite(vform); | |
411 int esize = LaneSizeInBytesFromFormat(vform); | |
412 uint64_t addr2 = addr1 + esize; | |
413 uint64_t addr3 = addr2 + esize; | |
414 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
415 dst1.ReadUintFromMem(vform, i, addr1); | |
416 dst2.ReadUintFromMem(vform, i, addr2); | |
417 dst3.ReadUintFromMem(vform, i, addr3); | |
418 addr1 += 3 * esize; | |
419 addr2 += 3 * esize; | |
420 addr3 += 3 * esize; | |
421 } | |
422 } | |
423 | |
424 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1, | |
425 LogicVRegister dst2, LogicVRegister dst3, int index, | |
426 uint64_t addr1) { | |
427 dst1.ClearForWrite(vform); | |
428 dst2.ClearForWrite(vform); | |
429 dst3.ClearForWrite(vform); | |
430 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); | |
431 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); | |
432 dst1.ReadUintFromMem(vform, index, addr1); | |
433 dst2.ReadUintFromMem(vform, index, addr2); | |
434 dst3.ReadUintFromMem(vform, index, addr3); | |
435 } | |
436 | |
437 void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1, | |
438 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) { | |
439 dst1.ClearForWrite(vform); | |
440 dst2.ClearForWrite(vform); | |
441 dst3.ClearForWrite(vform); | |
442 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); | |
443 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); | |
444 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
445 dst1.ReadUintFromMem(vform, i, addr); | |
446 dst2.ReadUintFromMem(vform, i, addr2); | |
447 dst3.ReadUintFromMem(vform, i, addr3); | |
448 } | |
449 } | |
450 | |
451 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1, | |
452 LogicVRegister dst2, LogicVRegister dst3, | |
453 LogicVRegister dst4, uint64_t addr1) { | |
454 dst1.ClearForWrite(vform); | |
455 dst2.ClearForWrite(vform); | |
456 dst3.ClearForWrite(vform); | |
457 dst4.ClearForWrite(vform); | |
458 int esize = LaneSizeInBytesFromFormat(vform); | |
459 uint64_t addr2 = addr1 + esize; | |
460 uint64_t addr3 = addr2 + esize; | |
461 uint64_t addr4 = addr3 + esize; | |
462 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
463 dst1.ReadUintFromMem(vform, i, addr1); | |
464 dst2.ReadUintFromMem(vform, i, addr2); | |
465 dst3.ReadUintFromMem(vform, i, addr3); | |
466 dst4.ReadUintFromMem(vform, i, addr4); | |
467 addr1 += 4 * esize; | |
468 addr2 += 4 * esize; | |
469 addr3 += 4 * esize; | |
470 addr4 += 4 * esize; | |
471 } | |
472 } | |
473 | |
474 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1, | |
475 LogicVRegister dst2, LogicVRegister dst3, | |
476 LogicVRegister dst4, int index, uint64_t addr1) { | |
477 dst1.ClearForWrite(vform); | |
478 dst2.ClearForWrite(vform); | |
479 dst3.ClearForWrite(vform); | |
480 dst4.ClearForWrite(vform); | |
481 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); | |
482 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); | |
483 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); | |
484 dst1.ReadUintFromMem(vform, index, addr1); | |
485 dst2.ReadUintFromMem(vform, index, addr2); | |
486 dst3.ReadUintFromMem(vform, index, addr3); | |
487 dst4.ReadUintFromMem(vform, index, addr4); | |
488 } | |
489 | |
490 void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1, | |
491 LogicVRegister dst2, LogicVRegister dst3, | |
492 LogicVRegister dst4, uint64_t addr) { | |
493 dst1.ClearForWrite(vform); | |
494 dst2.ClearForWrite(vform); | |
495 dst3.ClearForWrite(vform); | |
496 dst4.ClearForWrite(vform); | |
497 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); | |
498 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); | |
499 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); | |
500 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
501 dst1.ReadUintFromMem(vform, i, addr); | |
502 dst2.ReadUintFromMem(vform, i, addr2); | |
503 dst3.ReadUintFromMem(vform, i, addr3); | |
504 dst4.ReadUintFromMem(vform, i, addr4); | |
505 } | |
506 } | |
507 | |
508 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) { | |
509 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
510 src.WriteUintToMem(vform, i, addr); | |
511 addr += LaneSizeInBytesFromFormat(vform); | |
512 } | |
513 } | |
514 | |
515 void Simulator::st1(VectorFormat vform, LogicVRegister src, int index, | |
516 uint64_t addr) { | |
517 src.WriteUintToMem(vform, index, addr); | |
518 } | |
519 | |
520 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, | |
521 uint64_t addr) { | |
522 int esize = LaneSizeInBytesFromFormat(vform); | |
523 uint64_t addr2 = addr + esize; | |
524 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
525 dst.WriteUintToMem(vform, i, addr); | |
526 dst2.WriteUintToMem(vform, i, addr2); | |
527 addr += 2 * esize; | |
528 addr2 += 2 * esize; | |
529 } | |
530 } | |
531 | |
532 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, | |
533 int index, uint64_t addr) { | |
534 int esize = LaneSizeInBytesFromFormat(vform); | |
535 dst.WriteUintToMem(vform, index, addr); | |
536 dst2.WriteUintToMem(vform, index, addr + 1 * esize); | |
537 } | |
538 | |
539 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, | |
540 LogicVRegister dst3, uint64_t addr) { | |
541 int esize = LaneSizeInBytesFromFormat(vform); | |
542 uint64_t addr2 = addr + esize; | |
543 uint64_t addr3 = addr2 + esize; | |
544 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
545 dst.WriteUintToMem(vform, i, addr); | |
546 dst2.WriteUintToMem(vform, i, addr2); | |
547 dst3.WriteUintToMem(vform, i, addr3); | |
548 addr += 3 * esize; | |
549 addr2 += 3 * esize; | |
550 addr3 += 3 * esize; | |
551 } | |
552 } | |
553 | |
554 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, | |
555 LogicVRegister dst3, int index, uint64_t addr) { | |
556 int esize = LaneSizeInBytesFromFormat(vform); | |
557 dst.WriteUintToMem(vform, index, addr); | |
558 dst2.WriteUintToMem(vform, index, addr + 1 * esize); | |
559 dst3.WriteUintToMem(vform, index, addr + 2 * esize); | |
560 } | |
561 | |
562 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, | |
563 LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) { | |
564 int esize = LaneSizeInBytesFromFormat(vform); | |
565 uint64_t addr2 = addr + esize; | |
566 uint64_t addr3 = addr2 + esize; | |
567 uint64_t addr4 = addr3 + esize; | |
568 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
569 dst.WriteUintToMem(vform, i, addr); | |
570 dst2.WriteUintToMem(vform, i, addr2); | |
571 dst3.WriteUintToMem(vform, i, addr3); | |
572 dst4.WriteUintToMem(vform, i, addr4); | |
573 addr += 4 * esize; | |
574 addr2 += 4 * esize; | |
575 addr3 += 4 * esize; | |
576 addr4 += 4 * esize; | |
577 } | |
578 } | |
579 | |
580 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, | |
581 LogicVRegister dst3, LogicVRegister dst4, int index, | |
582 uint64_t addr) { | |
583 int esize = LaneSizeInBytesFromFormat(vform); | |
584 dst.WriteUintToMem(vform, index, addr); | |
585 dst2.WriteUintToMem(vform, index, addr + 1 * esize); | |
586 dst3.WriteUintToMem(vform, index, addr + 2 * esize); | |
587 dst4.WriteUintToMem(vform, index, addr + 3 * esize); | |
588 } | |
589 | |
590 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst, | |
591 const LogicVRegister& src1, | |
592 const LogicVRegister& src2, Condition cond) { | |
593 dst.ClearForWrite(vform); | |
594 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
595 int64_t sa = src1.Int(vform, i); | |
596 int64_t sb = src2.Int(vform, i); | |
597 uint64_t ua = src1.Uint(vform, i); | |
598 uint64_t ub = src2.Uint(vform, i); | |
599 bool result = false; | |
600 switch (cond) { | |
601 case eq: | |
602 result = (ua == ub); | |
603 break; | |
604 case ge: | |
605 result = (sa >= sb); | |
606 break; | |
607 case gt: | |
608 result = (sa > sb); | |
609 break; | |
610 case hi: | |
611 result = (ua > ub); | |
612 break; | |
613 case hs: | |
614 result = (ua >= ub); | |
615 break; | |
616 case lt: | |
617 result = (sa < sb); | |
618 break; | |
619 case le: | |
620 result = (sa <= sb); | |
621 break; | |
622 default: | |
623 UNREACHABLE(); | |
624 break; | |
625 } | |
626 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); | |
627 } | |
628 return dst; | |
629 } | |
630 | |
631 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst, | |
632 const LogicVRegister& src1, int imm, | |
633 Condition cond) { | |
634 SimVRegister temp; | |
635 LogicVRegister imm_reg = dup_immediate(vform, temp, imm); | |
636 return cmp(vform, dst, src1, imm_reg, cond); | |
637 } | |
638 | |
639 LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst, | |
640 const LogicVRegister& src1, | |
641 const LogicVRegister& src2) { | |
642 dst.ClearForWrite(vform); | |
643 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
644 uint64_t ua = src1.Uint(vform, i); | |
645 uint64_t ub = src2.Uint(vform, i); | |
646 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0); | |
647 } | |
648 return dst; | |
649 } | |
650 | |
651 LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst, | |
652 const LogicVRegister& src1, | |
653 const LogicVRegister& src2) { | |
654 int lane_size = LaneSizeInBitsFromFormat(vform); | |
655 dst.ClearForWrite(vform); | |
656 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
657 // Test for unsigned saturation. | |
658 uint64_t ua = src1.UintLeftJustified(vform, i); | |
659 uint64_t ub = src2.UintLeftJustified(vform, i); | |
660 uint64_t ur = ua + ub; | |
661 if (ur < ua) { | |
662 dst.SetUnsignedSat(i, true); | |
663 } | |
664 | |
665 // Test for signed saturation. | |
666 bool pos_a = (ua >> 63) == 0; | |
667 bool pos_b = (ub >> 63) == 0; | |
668 bool pos_r = (ur >> 63) == 0; | |
669 // If the signs of the operands are the same, but different from the result, | |
670 // there was an overflow. | |
671 if ((pos_a == pos_b) && (pos_a != pos_r)) { | |
672 dst.SetSignedSat(i, pos_a); | |
673 } | |
674 | |
675 dst.SetInt(vform, i, ur >> (64 - lane_size)); | |
676 } | |
677 return dst; | |
678 } | |
679 | |
680 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst, | |
681 const LogicVRegister& src1, | |
682 const LogicVRegister& src2) { | |
683 SimVRegister temp1, temp2; | |
684 uzp1(vform, temp1, src1, src2); | |
685 uzp2(vform, temp2, src1, src2); | |
686 add(vform, dst, temp1, temp2); | |
687 return dst; | |
688 } | |
689 | |
690 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst, | |
691 const LogicVRegister& src1, | |
692 const LogicVRegister& src2) { | |
693 SimVRegister temp; | |
694 mul(vform, temp, src1, src2); | |
695 add(vform, dst, dst, temp); | |
696 return dst; | |
697 } | |
698 | |
699 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst, | |
700 const LogicVRegister& src1, | |
701 const LogicVRegister& src2) { | |
702 SimVRegister temp; | |
703 mul(vform, temp, src1, src2); | |
704 sub(vform, dst, dst, temp); | |
705 return dst; | |
706 } | |
707 | |
708 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst, | |
709 const LogicVRegister& src1, | |
710 const LogicVRegister& src2) { | |
711 dst.ClearForWrite(vform); | |
712 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
713 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i)); | |
714 } | |
715 return dst; | |
716 } | |
717 | |
718 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst, | |
719 const LogicVRegister& src1, | |
720 const LogicVRegister& src2, int index) { | |
721 SimVRegister temp; | |
722 VectorFormat indexform = VectorFormatFillQ(vform); | |
723 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
724 } | |
725 | |
726 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst, | |
727 const LogicVRegister& src1, | |
728 const LogicVRegister& src2, int index) { | |
729 SimVRegister temp; | |
730 VectorFormat indexform = VectorFormatFillQ(vform); | |
731 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
732 } | |
733 | |
734 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst, | |
735 const LogicVRegister& src1, | |
736 const LogicVRegister& src2, int index) { | |
737 SimVRegister temp; | |
738 VectorFormat indexform = VectorFormatFillQ(vform); | |
739 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
740 } | |
741 | |
742 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst, | |
743 const LogicVRegister& src1, | |
744 const LogicVRegister& src2, int index) { | |
745 SimVRegister temp; | |
746 VectorFormat indexform = | |
747 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
748 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
749 } | |
750 | |
751 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst, | |
752 const LogicVRegister& src1, | |
753 const LogicVRegister& src2, int index) { | |
754 SimVRegister temp; | |
755 VectorFormat indexform = | |
756 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
757 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
758 } | |
759 | |
760 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst, | |
761 const LogicVRegister& src1, | |
762 const LogicVRegister& src2, int index) { | |
763 SimVRegister temp; | |
764 VectorFormat indexform = | |
765 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
766 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
767 } | |
768 | |
769 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst, | |
770 const LogicVRegister& src1, | |
771 const LogicVRegister& src2, int index) { | |
772 SimVRegister temp; | |
773 VectorFormat indexform = | |
774 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
775 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
776 } | |
777 | |
778 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst, | |
779 const LogicVRegister& src1, | |
780 const LogicVRegister& src2, int index) { | |
781 SimVRegister temp; | |
782 VectorFormat indexform = | |
783 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
784 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
785 } | |
786 | |
787 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst, | |
788 const LogicVRegister& src1, | |
789 const LogicVRegister& src2, int index) { | |
790 SimVRegister temp; | |
791 VectorFormat indexform = | |
792 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
793 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
794 } | |
795 | |
796 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst, | |
797 const LogicVRegister& src1, | |
798 const LogicVRegister& src2, int index) { | |
799 SimVRegister temp; | |
800 VectorFormat indexform = | |
801 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
802 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
803 } | |
804 | |
805 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst, | |
806 const LogicVRegister& src1, | |
807 const LogicVRegister& src2, int index) { | |
808 SimVRegister temp; | |
809 VectorFormat indexform = | |
810 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
811 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
812 } | |
813 | |
814 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst, | |
815 const LogicVRegister& src1, | |
816 const LogicVRegister& src2, int index) { | |
817 SimVRegister temp; | |
818 VectorFormat indexform = | |
819 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
820 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
821 } | |
822 | |
823 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst, | |
824 const LogicVRegister& src1, | |
825 const LogicVRegister& src2, int index) { | |
826 SimVRegister temp; | |
827 VectorFormat indexform = | |
828 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
829 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
830 } | |
831 | |
832 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst, | |
833 const LogicVRegister& src1, | |
834 const LogicVRegister& src2, int index) { | |
835 SimVRegister temp; | |
836 VectorFormat indexform = | |
837 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
838 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
839 } | |
840 | |
841 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst, | |
842 const LogicVRegister& src1, | |
843 const LogicVRegister& src2, int index) { | |
844 SimVRegister temp; | |
845 VectorFormat indexform = | |
846 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
847 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
848 } | |
849 | |
850 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst, | |
851 const LogicVRegister& src1, | |
852 const LogicVRegister& src2, int index) { | |
853 SimVRegister temp; | |
854 VectorFormat indexform = | |
855 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
856 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
857 } | |
858 | |
859 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst, | |
860 const LogicVRegister& src1, | |
861 const LogicVRegister& src2, int index) { | |
862 SimVRegister temp; | |
863 VectorFormat indexform = | |
864 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
865 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
866 } | |
867 | |
868 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst, | |
869 const LogicVRegister& src1, | |
870 const LogicVRegister& src2, int index) { | |
871 SimVRegister temp; | |
872 VectorFormat indexform = | |
873 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
874 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
875 } | |
876 | |
877 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst, | |
878 const LogicVRegister& src1, | |
879 const LogicVRegister& src2, int index) { | |
880 SimVRegister temp; | |
881 VectorFormat indexform = | |
882 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
883 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
884 } | |
885 | |
886 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst, | |
887 const LogicVRegister& src1, | |
888 const LogicVRegister& src2, int index) { | |
889 SimVRegister temp; | |
890 VectorFormat indexform = | |
891 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
892 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
893 } | |
894 | |
895 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst, | |
896 const LogicVRegister& src1, | |
897 const LogicVRegister& src2, int index) { | |
898 SimVRegister temp; | |
899 VectorFormat indexform = | |
900 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
901 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
902 } | |
903 | |
904 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst, | |
905 const LogicVRegister& src1, | |
906 const LogicVRegister& src2, int index) { | |
907 SimVRegister temp; | |
908 VectorFormat indexform = VectorFormatFillQ(vform); | |
909 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
910 } | |
911 | |
912 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst, | |
913 const LogicVRegister& src1, | |
914 const LogicVRegister& src2, int index) { | |
915 SimVRegister temp; | |
916 VectorFormat indexform = VectorFormatFillQ(vform); | |
917 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
918 } | |
919 | |
920 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) { | |
921 uint16_t result = 0; | |
922 uint16_t extended_op2 = op2; | |
923 for (int i = 0; i < 8; ++i) { | |
924 if ((op1 >> i) & 1) { | |
925 result = result ^ (extended_op2 << i); | |
926 } | |
927 } | |
928 return result; | |
929 } | |
930 | |
931 LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst, | |
932 const LogicVRegister& src1, | |
933 const LogicVRegister& src2) { | |
934 dst.ClearForWrite(vform); | |
935 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
936 dst.SetUint(vform, i, | |
937 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i))); | |
938 } | |
939 return dst; | |
940 } | |
941 | |
942 LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst, | |
943 const LogicVRegister& src1, | |
944 const LogicVRegister& src2) { | |
945 VectorFormat vform_src = VectorFormatHalfWidth(vform); | |
946 dst.ClearForWrite(vform); | |
947 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
948 dst.SetUint( | |
949 vform, i, | |
950 PolynomialMult(src1.Uint(vform_src, i), src2.Uint(vform_src, i))); | |
951 } | |
952 return dst; | |
953 } | |
954 | |
955 LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst, | |
956 const LogicVRegister& src1, | |
957 const LogicVRegister& src2) { | |
958 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform); | |
959 dst.ClearForWrite(vform); | |
960 int lane_count = LaneCountFromFormat(vform); | |
961 for (int i = 0; i < lane_count; i++) { | |
962 dst.SetUint(vform, i, | |
963 PolynomialMult(src1.Uint(vform_src, lane_count + i), | |
964 src2.Uint(vform_src, lane_count + i))); | |
965 } | |
966 return dst; | |
967 } | |
968 | |
969 LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst, | |
970 const LogicVRegister& src1, | |
971 const LogicVRegister& src2) { | |
972 int lane_size = LaneSizeInBitsFromFormat(vform); | |
973 dst.ClearForWrite(vform); | |
974 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
975 // Test for unsigned saturation. | |
976 uint64_t ua = src1.UintLeftJustified(vform, i); | |
977 uint64_t ub = src2.UintLeftJustified(vform, i); | |
978 uint64_t ur = ua - ub; | |
979 if (ub > ua) { | |
980 dst.SetUnsignedSat(i, false); | |
981 } | |
982 | |
983 // Test for signed saturation. | |
984 bool pos_a = (ua >> 63) == 0; | |
985 bool pos_b = (ub >> 63) == 0; | |
986 bool pos_r = (ur >> 63) == 0; | |
987 // If the signs of the operands are different, and the sign of the first | |
988 // operand doesn't match the result, there was an overflow. | |
989 if ((pos_a != pos_b) && (pos_a != pos_r)) { | |
990 dst.SetSignedSat(i, pos_a); | |
991 } | |
992 | |
993 dst.SetInt(vform, i, ur >> (64 - lane_size)); | |
994 } | |
995 return dst; | |
996 } | |
997 | |
998 LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst, | |
999 const LogicVRegister& src1, | |
1000 const LogicVRegister& src2) { | |
1001 dst.ClearForWrite(vform); | |
1002 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1003 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i)); | |
1004 } | |
1005 return dst; | |
1006 } | |
1007 | |
1008 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst, | |
1009 const LogicVRegister& src1, | |
1010 const LogicVRegister& src2) { | |
1011 dst.ClearForWrite(vform); | |
1012 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1013 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i)); | |
1014 } | |
1015 return dst; | |
1016 } | |
1017 | |
1018 LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst, | |
1019 const LogicVRegister& src1, | |
1020 const LogicVRegister& src2) { | |
1021 dst.ClearForWrite(vform); | |
1022 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1023 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i)); | |
1024 } | |
1025 return dst; | |
1026 } | |
1027 | |
1028 LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst, | |
1029 const LogicVRegister& src1, | |
1030 const LogicVRegister& src2) { | |
1031 dst.ClearForWrite(vform); | |
1032 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1033 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i)); | |
1034 } | |
1035 return dst; | |
1036 } | |
1037 | |
1038 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst, | |
1039 const LogicVRegister& src1, | |
1040 const LogicVRegister& src2) { | |
1041 dst.ClearForWrite(vform); | |
1042 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1043 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i)); | |
1044 } | |
1045 return dst; | |
1046 } | |
1047 | |
1048 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst, | |
1049 const LogicVRegister& src, uint64_t imm) { | |
1050 uint64_t result[16]; | |
1051 int laneCount = LaneCountFromFormat(vform); | |
1052 for (int i = 0; i < laneCount; ++i) { | |
1053 result[i] = src.Uint(vform, i) & ~imm; | |
1054 } | |
1055 dst.SetUintArray(vform, result); | |
1056 return dst; | |
1057 } | |
1058 | |
1059 LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst, | |
1060 const LogicVRegister& src1, | |
1061 const LogicVRegister& src2) { | |
1062 dst.ClearForWrite(vform); | |
1063 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1064 uint64_t operand1 = dst.Uint(vform, i); | |
1065 uint64_t operand2 = ~src2.Uint(vform, i); | |
1066 uint64_t operand3 = src1.Uint(vform, i); | |
1067 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); | |
1068 dst.SetUint(vform, i, result); | |
1069 } | |
1070 return dst; | |
1071 } | |
1072 | |
1073 LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst, | |
1074 const LogicVRegister& src1, | |
1075 const LogicVRegister& src2) { | |
1076 dst.ClearForWrite(vform); | |
1077 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1078 uint64_t operand1 = dst.Uint(vform, i); | |
1079 uint64_t operand2 = src2.Uint(vform, i); | |
1080 uint64_t operand3 = src1.Uint(vform, i); | |
1081 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); | |
1082 dst.SetUint(vform, i, result); | |
1083 } | |
1084 return dst; | |
1085 } | |
1086 | |
1087 LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst, | |
1088 const LogicVRegister& src1, | |
1089 const LogicVRegister& src2) { | |
1090 dst.ClearForWrite(vform); | |
1091 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1092 uint64_t operand1 = src2.Uint(vform, i); | |
1093 uint64_t operand2 = dst.Uint(vform, i); | |
1094 uint64_t operand3 = src1.Uint(vform, i); | |
1095 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); | |
1096 dst.SetUint(vform, i, result); | |
1097 } | |
1098 return dst; | |
1099 } | |
1100 | |
1101 LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst, | |
1102 const LogicVRegister& src1, | |
1103 const LogicVRegister& src2, bool max) { | |
1104 dst.ClearForWrite(vform); | |
1105 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1106 int64_t src1_val = src1.Int(vform, i); | |
1107 int64_t src2_val = src2.Int(vform, i); | |
1108 int64_t dst_val; | |
1109 if (max) { | |
1110 dst_val = (src1_val > src2_val) ? src1_val : src2_val; | |
1111 } else { | |
1112 dst_val = (src1_val < src2_val) ? src1_val : src2_val; | |
1113 } | |
1114 dst.SetInt(vform, i, dst_val); | |
1115 } | |
1116 return dst; | |
1117 } | |
1118 | |
1119 LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst, | |
1120 const LogicVRegister& src1, | |
1121 const LogicVRegister& src2) { | |
1122 return SMinMax(vform, dst, src1, src2, true); | |
1123 } | |
1124 | |
1125 LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst, | |
1126 const LogicVRegister& src1, | |
1127 const LogicVRegister& src2) { | |
1128 return SMinMax(vform, dst, src1, src2, false); | |
1129 } | |
1130 | |
1131 LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst, | |
1132 const LogicVRegister& src1, | |
1133 const LogicVRegister& src2, bool max) { | |
1134 int lanes = LaneCountFromFormat(vform); | |
1135 int64_t result[kMaxLanesPerVector]; | |
1136 const LogicVRegister* src = &src1; | |
1137 for (int j = 0; j < 2; j++) { | |
1138 for (int i = 0; i < lanes; i += 2) { | |
1139 int64_t first_val = src->Int(vform, i); | |
1140 int64_t second_val = src->Int(vform, i + 1); | |
1141 int64_t dst_val; | |
1142 if (max) { | |
1143 dst_val = (first_val > second_val) ? first_val : second_val; | |
1144 } else { | |
1145 dst_val = (first_val < second_val) ? first_val : second_val; | |
1146 } | |
1147 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector); | |
1148 result[(i >> 1) + (j * lanes / 2)] = dst_val; | |
1149 } | |
1150 src = &src2; | |
1151 } | |
1152 dst.SetIntArray(vform, result); | |
1153 return dst; | |
1154 } | |
1155 | |
1156 LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst, | |
1157 const LogicVRegister& src1, | |
1158 const LogicVRegister& src2) { | |
1159 return SMinMaxP(vform, dst, src1, src2, true); | |
1160 } | |
1161 | |
1162 LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst, | |
1163 const LogicVRegister& src1, | |
1164 const LogicVRegister& src2) { | |
1165 return SMinMaxP(vform, dst, src1, src2, false); | |
1166 } | |
1167 | |
1168 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst, | |
1169 const LogicVRegister& src) { | |
1170 DCHECK_EQ(vform, kFormatD); | |
1171 | |
1172 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1); | |
1173 dst.ClearForWrite(vform); | |
1174 dst.SetUint(vform, 0, dst_val); | |
1175 return dst; | |
1176 } | |
1177 | |
1178 LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst, | |
1179 const LogicVRegister& src) { | |
1180 VectorFormat vform_dst = | |
1181 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); | |
1182 | |
1183 int64_t dst_val = 0; | |
1184 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1185 dst_val += src.Int(vform, i); | |
1186 } | |
1187 | |
1188 dst.ClearForWrite(vform_dst); | |
1189 dst.SetInt(vform_dst, 0, dst_val); | |
1190 return dst; | |
1191 } | |
1192 | |
1193 LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst, | |
1194 const LogicVRegister& src) { | |
1195 VectorFormat vform_dst = | |
1196 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); | |
1197 | |
1198 int64_t dst_val = 0; | |
1199 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1200 dst_val += src.Int(vform, i); | |
1201 } | |
1202 | |
1203 dst.ClearForWrite(vform_dst); | |
1204 dst.SetInt(vform_dst, 0, dst_val); | |
1205 return dst; | |
1206 } | |
1207 | |
1208 LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst, | |
1209 const LogicVRegister& src) { | |
1210 VectorFormat vform_dst = | |
1211 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); | |
1212 | |
1213 uint64_t dst_val = 0; | |
1214 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1215 dst_val += src.Uint(vform, i); | |
1216 } | |
1217 | |
1218 dst.ClearForWrite(vform_dst); | |
1219 dst.SetUint(vform_dst, 0, dst_val); | |
1220 return dst; | |
1221 } | |
1222 | |
1223 LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst, | |
1224 const LogicVRegister& src, bool max) { | |
1225 int64_t dst_val = max ? INT64_MIN : INT64_MAX; | |
1226 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1227 int64_t src_val = src.Int(vform, i); | |
1228 if (max) { | |
1229 dst_val = (src_val > dst_val) ? src_val : dst_val; | |
1230 } else { | |
1231 dst_val = (src_val < dst_val) ? src_val : dst_val; | |
1232 } | |
1233 } | |
1234 dst.ClearForWrite(ScalarFormatFromFormat(vform)); | |
1235 dst.SetInt(vform, 0, dst_val); | |
1236 return dst; | |
1237 } | |
1238 | |
1239 LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst, | |
1240 const LogicVRegister& src) { | |
1241 SMinMaxV(vform, dst, src, true); | |
1242 return dst; | |
1243 } | |
1244 | |
1245 LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst, | |
1246 const LogicVRegister& src) { | |
1247 SMinMaxV(vform, dst, src, false); | |
1248 return dst; | |
1249 } | |
1250 | |
1251 LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst, | |
1252 const LogicVRegister& src1, | |
1253 const LogicVRegister& src2, bool max) { | |
1254 dst.ClearForWrite(vform); | |
1255 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1256 uint64_t src1_val = src1.Uint(vform, i); | |
1257 uint64_t src2_val = src2.Uint(vform, i); | |
1258 uint64_t dst_val; | |
1259 if (max) { | |
1260 dst_val = (src1_val > src2_val) ? src1_val : src2_val; | |
1261 } else { | |
1262 dst_val = (src1_val < src2_val) ? src1_val : src2_val; | |
1263 } | |
1264 dst.SetUint(vform, i, dst_val); | |
1265 } | |
1266 return dst; | |
1267 } | |
1268 | |
1269 LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst, | |
1270 const LogicVRegister& src1, | |
1271 const LogicVRegister& src2) { | |
1272 return UMinMax(vform, dst, src1, src2, true); | |
1273 } | |
1274 | |
1275 LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst, | |
1276 const LogicVRegister& src1, | |
1277 const LogicVRegister& src2) { | |
1278 return UMinMax(vform, dst, src1, src2, false); | |
1279 } | |
1280 | |
1281 LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst, | |
1282 const LogicVRegister& src1, | |
1283 const LogicVRegister& src2, bool max) { | |
1284 int lanes = LaneCountFromFormat(vform); | |
1285 uint64_t result[kMaxLanesPerVector]; | |
1286 const LogicVRegister* src = &src1; | |
1287 for (int j = 0; j < 2; j++) { | |
1288 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { | |
1289 uint64_t first_val = src->Uint(vform, i); | |
1290 uint64_t second_val = src->Uint(vform, i + 1); | |
1291 uint64_t dst_val; | |
1292 if (max) { | |
1293 dst_val = (first_val > second_val) ? first_val : second_val; | |
1294 } else { | |
1295 dst_val = (first_val < second_val) ? first_val : second_val; | |
1296 } | |
1297 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector); | |
1298 result[(i >> 1) + (j * lanes / 2)] = dst_val; | |
1299 } | |
1300 src = &src2; | |
1301 } | |
1302 dst.SetUintArray(vform, result); | |
1303 return dst; | |
1304 } | |
1305 | |
1306 LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst, | |
1307 const LogicVRegister& src1, | |
1308 const LogicVRegister& src2) { | |
1309 return UMinMaxP(vform, dst, src1, src2, true); | |
1310 } | |
1311 | |
1312 LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst, | |
1313 const LogicVRegister& src1, | |
1314 const LogicVRegister& src2) { | |
1315 return UMinMaxP(vform, dst, src1, src2, false); | |
1316 } | |
1317 | |
1318 LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst, | |
1319 const LogicVRegister& src, bool max) { | |
1320 uint64_t dst_val = max ? 0 : UINT64_MAX; | |
1321 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1322 uint64_t src_val = src.Uint(vform, i); | |
1323 if (max) { | |
1324 dst_val = (src_val > dst_val) ? src_val : dst_val; | |
1325 } else { | |
1326 dst_val = (src_val < dst_val) ? src_val : dst_val; | |
1327 } | |
1328 } | |
1329 dst.ClearForWrite(ScalarFormatFromFormat(vform)); | |
1330 dst.SetUint(vform, 0, dst_val); | |
1331 return dst; | |
1332 } | |
1333 | |
1334 LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst, | |
1335 const LogicVRegister& src) { | |
1336 UMinMaxV(vform, dst, src, true); | |
1337 return dst; | |
1338 } | |
1339 | |
1340 LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst, | |
1341 const LogicVRegister& src) { | |
1342 UMinMaxV(vform, dst, src, false); | |
1343 return dst; | |
1344 } | |
1345 | |
1346 LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst, | |
1347 const LogicVRegister& src, int shift) { | |
1348 DCHECK_GE(shift, 0); | |
1349 SimVRegister temp; | |
1350 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); | |
1351 return ushl(vform, dst, src, shiftreg); | |
1352 } | |
1353 | |
1354 LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst, | |
1355 const LogicVRegister& src, int shift) { | |
1356 DCHECK_GE(shift, 0); | |
1357 SimVRegister temp1, temp2; | |
1358 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); | |
1359 LogicVRegister extendedreg = sxtl(vform, temp2, src); | |
1360 return sshl(vform, dst, extendedreg, shiftreg); | |
1361 } | |
1362 | |
1363 LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst, | |
1364 const LogicVRegister& src, int shift) { | |
1365 DCHECK_GE(shift, 0); | |
1366 SimVRegister temp1, temp2; | |
1367 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); | |
1368 LogicVRegister extendedreg = sxtl2(vform, temp2, src); | |
1369 return sshl(vform, dst, extendedreg, shiftreg); | |
1370 } | |
1371 | |
1372 LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst, | |
1373 const LogicVRegister& src) { | |
1374 int shift = LaneSizeInBitsFromFormat(vform) / 2; | |
1375 return sshll(vform, dst, src, shift); | |
1376 } | |
1377 | |
1378 LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst, | |
1379 const LogicVRegister& src) { | |
1380 int shift = LaneSizeInBitsFromFormat(vform) / 2; | |
1381 return sshll2(vform, dst, src, shift); | |
1382 } | |
1383 | |
1384 LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst, | |
1385 const LogicVRegister& src, int shift) { | |
1386 DCHECK_GE(shift, 0); | |
1387 SimVRegister temp1, temp2; | |
1388 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); | |
1389 LogicVRegister extendedreg = uxtl(vform, temp2, src); | |
1390 return ushl(vform, dst, extendedreg, shiftreg); | |
1391 } | |
1392 | |
1393 LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst, | |
1394 const LogicVRegister& src, int shift) { | |
1395 DCHECK_GE(shift, 0); | |
1396 SimVRegister temp1, temp2; | |
1397 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); | |
1398 LogicVRegister extendedreg = uxtl2(vform, temp2, src); | |
1399 return ushl(vform, dst, extendedreg, shiftreg); | |
1400 } | |
1401 | |
1402 LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst, | |
1403 const LogicVRegister& src, int shift) { | |
1404 dst.ClearForWrite(vform); | |
1405 int laneCount = LaneCountFromFormat(vform); | |
1406 for (int i = 0; i < laneCount; i++) { | |
1407 uint64_t src_lane = src.Uint(vform, i); | |
1408 uint64_t dst_lane = dst.Uint(vform, i); | |
1409 uint64_t shifted = src_lane << shift; | |
1410 uint64_t mask = MaxUintFromFormat(vform) << shift; | |
1411 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); | |
1412 } | |
1413 return dst; | |
1414 } | |
1415 | |
1416 LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst, | |
1417 const LogicVRegister& src, int shift) { | |
1418 DCHECK_GE(shift, 0); | |
1419 SimVRegister temp; | |
1420 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); | |
1421 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform); | |
1422 } | |
1423 | |
1424 LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst, | |
1425 const LogicVRegister& src, int shift) { | |
1426 DCHECK_GE(shift, 0); | |
1427 SimVRegister temp; | |
1428 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); | |
1429 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform); | |
1430 } | |
1431 | |
1432 LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst, | |
1433 const LogicVRegister& src, int shift) { | |
1434 DCHECK_GE(shift, 0); | |
1435 SimVRegister temp; | |
1436 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); | |
1437 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform); | |
1438 } | |
1439 | |
1440 LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst, | |
1441 const LogicVRegister& src, int shift) { | |
1442 dst.ClearForWrite(vform); | |
1443 int laneCount = LaneCountFromFormat(vform); | |
1444 DCHECK((shift > 0) && | |
1445 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform)))); | |
1446 for (int i = 0; i < laneCount; i++) { | |
1447 uint64_t src_lane = src.Uint(vform, i); | |
1448 uint64_t dst_lane = dst.Uint(vform, i); | |
1449 uint64_t shifted; | |
1450 uint64_t mask; | |
1451 if (shift == 64) { | |
1452 shifted = 0; | |
1453 mask = 0; | |
1454 } else { | |
1455 shifted = src_lane >> shift; | |
1456 mask = MaxUintFromFormat(vform) >> shift; | |
1457 } | |
1458 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); | |
1459 } | |
1460 return dst; | |
1461 } | |
1462 | |
1463 LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst, | |
1464 const LogicVRegister& src, int shift) { | |
1465 DCHECK_GE(shift, 0); | |
1466 SimVRegister temp; | |
1467 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); | |
1468 return ushl(vform, dst, src, shiftreg); | |
1469 } | |
1470 | |
1471 LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst, | |
1472 const LogicVRegister& src, int shift) { | |
1473 DCHECK_GE(shift, 0); | |
1474 SimVRegister temp; | |
1475 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); | |
1476 return sshl(vform, dst, src, shiftreg); | |
1477 } | |
1478 | |
1479 LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst, | |
1480 const LogicVRegister& src, int shift) { | |
1481 SimVRegister temp; | |
1482 LogicVRegister shifted_reg = sshr(vform, temp, src, shift); | |
1483 return add(vform, dst, dst, shifted_reg); | |
1484 } | |
1485 | |
1486 LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst, | |
1487 const LogicVRegister& src, int shift) { | |
1488 SimVRegister temp; | |
1489 LogicVRegister shifted_reg = ushr(vform, temp, src, shift); | |
1490 return add(vform, dst, dst, shifted_reg); | |
1491 } | |
1492 | |
1493 LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst, | |
1494 const LogicVRegister& src, int shift) { | |
1495 SimVRegister temp; | |
1496 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform); | |
1497 return add(vform, dst, dst, shifted_reg); | |
1498 } | |
1499 | |
1500 LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst, | |
1501 const LogicVRegister& src, int shift) { | |
1502 SimVRegister temp; | |
1503 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform); | |
1504 return add(vform, dst, dst, shifted_reg); | |
1505 } | |
1506 | |
1507 LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst, | |
1508 const LogicVRegister& src) { | |
1509 uint64_t result[16]; | |
1510 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); | |
1511 int laneCount = LaneCountFromFormat(vform); | |
1512 for (int i = 0; i < laneCount; i++) { | |
1513 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits); | |
1514 } | |
1515 | |
1516 dst.SetUintArray(vform, result); | |
1517 return dst; | |
1518 } | |
1519 | |
1520 LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst, | |
1521 const LogicVRegister& src) { | |
1522 uint64_t result[16]; | |
1523 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); | |
1524 int laneCount = LaneCountFromFormat(vform); | |
1525 for (int i = 0; i < laneCount; i++) { | |
1526 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits); | |
1527 } | |
1528 | |
1529 dst.SetUintArray(vform, result); | |
1530 return dst; | |
1531 } | |
1532 | |
1533 LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst, | |
1534 const LogicVRegister& src) { | |
1535 uint64_t result[16]; | |
1536 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); | |
1537 int laneCount = LaneCountFromFormat(vform); | |
1538 for (int i = 0; i < laneCount; i++) { | |
1539 uint64_t value = src.Uint(vform, i); | |
1540 result[i] = 0; | |
1541 for (int j = 0; j < laneSizeInBits; j++) { | |
1542 result[i] += (value & 1); | |
1543 value >>= 1; | |
1544 } | |
1545 } | |
1546 | |
1547 dst.SetUintArray(vform, result); | |
1548 return dst; | |
1549 } | |
1550 | |
1551 LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst, | |
1552 const LogicVRegister& src1, | |
1553 const LogicVRegister& src2) { | |
1554 dst.ClearForWrite(vform); | |
1555 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1556 int8_t shift_val = src2.Int(vform, i); | |
1557 int64_t lj_src_val = src1.IntLeftJustified(vform, i); | |
1558 | |
1559 // Set signed saturation state. | |
1560 if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) && | |
1561 (lj_src_val != 0)) { | |
1562 dst.SetSignedSat(i, lj_src_val >= 0); | |
1563 } | |
1564 | |
1565 // Set unsigned saturation state. | |
1566 if (lj_src_val < 0) { | |
1567 dst.SetUnsignedSat(i, false); | |
1568 } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && | |
1569 (lj_src_val != 0)) { | |
1570 dst.SetUnsignedSat(i, true); | |
1571 } | |
1572 | |
1573 int64_t src_val = src1.Int(vform, i); | |
1574 bool src_is_negative = src_val < 0; | |
1575 if (shift_val > 63) { | |
1576 dst.SetInt(vform, i, 0); | |
1577 } else if (shift_val < -63) { | |
1578 dst.SetRounding(i, src_is_negative); | |
1579 dst.SetInt(vform, i, src_is_negative ? -1 : 0); | |
1580 } else { | |
1581 // Use unsigned types for shifts, as behaviour is undefined for signed | |
1582 // lhs. | |
1583 uint64_t usrc_val = static_cast<uint64_t>(src_val); | |
1584 | |
1585 if (shift_val < 0) { | |
1586 // Convert to right shift. | |
1587 shift_val = -shift_val; | |
1588 | |
1589 // Set rounding state by testing most-significant bit shifted out. | |
1590 // Rounding only needed on right shifts. | |
1591 if (((usrc_val >> (shift_val - 1)) & 1) == 1) { | |
1592 dst.SetRounding(i, true); | |
1593 } | |
1594 | |
1595 usrc_val >>= shift_val; | |
1596 | |
1597 if (src_is_negative) { | |
1598 // Simulate sign-extension. | |
1599 usrc_val |= (~UINT64_C(0) << (64 - shift_val)); | |
1600 } | |
1601 } else { | |
1602 usrc_val <<= shift_val; | |
1603 } | |
1604 dst.SetUint(vform, i, usrc_val); | |
1605 } | |
1606 } | |
1607 return dst; | |
1608 } | |
1609 | |
1610 LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst, | |
1611 const LogicVRegister& src1, | |
1612 const LogicVRegister& src2) { | |
1613 dst.ClearForWrite(vform); | |
1614 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1615 int8_t shift_val = src2.Int(vform, i); | |
1616 uint64_t lj_src_val = src1.UintLeftJustified(vform, i); | |
1617 | |
1618 // Set saturation state. | |
1619 if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) { | |
1620 dst.SetUnsignedSat(i, true); | |
1621 } | |
1622 | |
1623 uint64_t src_val = src1.Uint(vform, i); | |
1624 if ((shift_val > 63) || (shift_val < -64)) { | |
1625 dst.SetUint(vform, i, 0); | |
1626 } else { | |
1627 if (shift_val < 0) { | |
1628 // Set rounding state. Rounding only needed on right shifts. | |
1629 if (((src_val >> (-shift_val - 1)) & 1) == 1) { | |
1630 dst.SetRounding(i, true); | |
1631 } | |
1632 | |
1633 if (shift_val == -64) { | |
1634 src_val = 0; | |
1635 } else { | |
1636 src_val >>= -shift_val; | |
1637 } | |
1638 } else { | |
1639 src_val <<= shift_val; | |
1640 } | |
1641 dst.SetUint(vform, i, src_val); | |
1642 } | |
1643 } | |
1644 return dst; | |
1645 } | |
1646 | |
1647 LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst, | |
1648 const LogicVRegister& src) { | |
1649 dst.ClearForWrite(vform); | |
1650 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1651 // Test for signed saturation. | |
1652 int64_t sa = src.Int(vform, i); | |
1653 if (sa == MinIntFromFormat(vform)) { | |
1654 dst.SetSignedSat(i, true); | |
1655 } | |
1656 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); | |
1657 } | |
1658 return dst; | |
1659 } | |
1660 | |
1661 LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst, | |
1662 const LogicVRegister& src) { | |
1663 dst.ClearForWrite(vform); | |
1664 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1665 int64_t sa = dst.IntLeftJustified(vform, i); | |
1666 uint64_t ub = src.UintLeftJustified(vform, i); | |
1667 uint64_t ur = sa + ub; | |
1668 | |
1669 int64_t sr = bit_cast<int64_t>(ur); | |
1670 if (sr < sa) { // Test for signed positive saturation. | |
1671 dst.SetInt(vform, i, MaxIntFromFormat(vform)); | |
1672 } else { | |
1673 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i)); | |
1674 } | |
1675 } | |
1676 return dst; | |
1677 } | |
1678 | |
1679 LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst, | |
1680 const LogicVRegister& src) { | |
1681 dst.ClearForWrite(vform); | |
1682 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1683 uint64_t ua = dst.UintLeftJustified(vform, i); | |
1684 int64_t sb = src.IntLeftJustified(vform, i); | |
1685 uint64_t ur = ua + sb; | |
1686 | |
1687 if ((sb > 0) && (ur <= ua)) { | |
1688 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation. | |
1689 } else if ((sb < 0) && (ur >= ua)) { | |
1690 dst.SetUint(vform, i, 0); // Negative saturation. | |
1691 } else { | |
1692 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i)); | |
1693 } | |
1694 } | |
1695 return dst; | |
1696 } | |
1697 | |
1698 LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst, | |
1699 const LogicVRegister& src) { | |
1700 dst.ClearForWrite(vform); | |
1701 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1702 // Test for signed saturation. | |
1703 int64_t sa = src.Int(vform, i); | |
1704 if (sa == MinIntFromFormat(vform)) { | |
1705 dst.SetSignedSat(i, true); | |
1706 } | |
1707 if (sa < 0) { | |
1708 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); | |
1709 } else { | |
1710 dst.SetInt(vform, i, sa); | |
1711 } | |
1712 } | |
1713 return dst; | |
1714 } | |
1715 | |
1716 LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform, | |
1717 LogicVRegister dst, bool dstIsSigned, | |
1718 const LogicVRegister& src, | |
1719 bool srcIsSigned) { | |
1720 bool upperhalf = false; | |
1721 VectorFormat srcform = kFormatUndefined; | |
1722 int64_t ssrc[8]; | |
1723 uint64_t usrc[8]; | |
1724 | |
1725 switch (dstform) { | |
1726 case kFormat8B: | |
1727 upperhalf = false; | |
1728 srcform = kFormat8H; | |
1729 break; | |
1730 case kFormat16B: | |
1731 upperhalf = true; | |
1732 srcform = kFormat8H; | |
1733 break; | |
1734 case kFormat4H: | |
1735 upperhalf = false; | |
1736 srcform = kFormat4S; | |
1737 break; | |
1738 case kFormat8H: | |
1739 upperhalf = true; | |
1740 srcform = kFormat4S; | |
1741 break; | |
1742 case kFormat2S: | |
1743 upperhalf = false; | |
1744 srcform = kFormat2D; | |
1745 break; | |
1746 case kFormat4S: | |
1747 upperhalf = true; | |
1748 srcform = kFormat2D; | |
1749 break; | |
1750 case kFormatB: | |
1751 upperhalf = false; | |
1752 srcform = kFormatH; | |
1753 break; | |
1754 case kFormatH: | |
1755 upperhalf = false; | |
1756 srcform = kFormatS; | |
1757 break; | |
1758 case kFormatS: | |
1759 upperhalf = false; | |
1760 srcform = kFormatD; | |
1761 break; | |
1762 default: | |
1763 UNIMPLEMENTED(); | |
1764 } | |
1765 | |
1766 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { | |
1767 ssrc[i] = src.Int(srcform, i); | |
1768 usrc[i] = src.Uint(srcform, i); | |
1769 } | |
1770 | |
1771 int offset; | |
1772 if (upperhalf) { | |
1773 offset = LaneCountFromFormat(dstform) / 2; | |
1774 } else { | |
1775 offset = 0; | |
1776 dst.ClearForWrite(dstform); | |
1777 } | |
1778 | |
1779 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { | |
1780 // Test for signed saturation | |
1781 if (ssrc[i] > MaxIntFromFormat(dstform)) { | |
1782 dst.SetSignedSat(offset + i, true); | |
1783 } else if (ssrc[i] < MinIntFromFormat(dstform)) { | |
1784 dst.SetSignedSat(offset + i, false); | |
1785 } | |
1786 | |
1787 // Test for unsigned saturation | |
1788 if (srcIsSigned) { | |
1789 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) { | |
1790 dst.SetUnsignedSat(offset + i, true); | |
1791 } else if (ssrc[i] < 0) { | |
1792 dst.SetUnsignedSat(offset + i, false); | |
1793 } | |
1794 } else { | |
1795 if (usrc[i] > MaxUintFromFormat(dstform)) { | |
1796 dst.SetUnsignedSat(offset + i, true); | |
1797 } | |
1798 } | |
1799 | |
1800 int64_t result; | |
1801 if (srcIsSigned) { | |
1802 result = ssrc[i] & MaxUintFromFormat(dstform); | |
1803 } else { | |
1804 result = usrc[i] & MaxUintFromFormat(dstform); | |
1805 } | |
1806 | |
1807 if (dstIsSigned) { | |
1808 dst.SetInt(dstform, offset + i, result); | |
1809 } else { | |
1810 dst.SetUint(dstform, offset + i, result); | |
1811 } | |
1812 } | |
1813 return dst; | |
1814 } | |
1815 | |
1816 LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst, | |
1817 const LogicVRegister& src) { | |
1818 return ExtractNarrow(vform, dst, true, src, true); | |
1819 } | |
1820 | |
1821 LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst, | |
1822 const LogicVRegister& src) { | |
1823 return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform); | |
1824 } | |
1825 | |
1826 LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst, | |
1827 const LogicVRegister& src) { | |
1828 return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform); | |
1829 } | |
1830 | |
1831 LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst, | |
1832 const LogicVRegister& src) { | |
1833 return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform); | |
1834 } | |
1835 | |
1836 LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst, | |
1837 const LogicVRegister& src1, | |
1838 const LogicVRegister& src2, bool issigned) { | |
1839 dst.ClearForWrite(vform); | |
1840 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1841 if (issigned) { | |
1842 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i); | |
1843 sr = sr > 0 ? sr : -sr; | |
1844 dst.SetInt(vform, i, sr); | |
1845 } else { | |
1846 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i); | |
1847 sr = sr > 0 ? sr : -sr; | |
1848 dst.SetUint(vform, i, sr); | |
1849 } | |
1850 } | |
1851 return dst; | |
1852 } | |
1853 | |
1854 LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst, | |
1855 const LogicVRegister& src1, | |
1856 const LogicVRegister& src2) { | |
1857 SimVRegister temp; | |
1858 dst.ClearForWrite(vform); | |
1859 AbsDiff(vform, temp, src1, src2, true); | |
1860 add(vform, dst, dst, temp); | |
1861 return dst; | |
1862 } | |
1863 | |
1864 LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst, | |
1865 const LogicVRegister& src1, | |
1866 const LogicVRegister& src2) { | |
1867 SimVRegister temp; | |
1868 dst.ClearForWrite(vform); | |
1869 AbsDiff(vform, temp, src1, src2, false); | |
1870 add(vform, dst, dst, temp); | |
1871 return dst; | |
1872 } | |
1873 | |
1874 LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst, | |
1875 const LogicVRegister& src) { | |
1876 dst.ClearForWrite(vform); | |
1877 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1878 dst.SetUint(vform, i, ~src.Uint(vform, i)); | |
1879 } | |
1880 return dst; | |
1881 } | |
1882 | |
1883 LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst, | |
1884 const LogicVRegister& src) { | |
1885 uint64_t result[16]; | |
1886 int laneCount = LaneCountFromFormat(vform); | |
1887 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); | |
1888 uint64_t reversed_value; | |
1889 uint64_t value; | |
1890 for (int i = 0; i < laneCount; i++) { | |
1891 value = src.Uint(vform, i); | |
1892 reversed_value = 0; | |
1893 for (int j = 0; j < laneSizeInBits; j++) { | |
1894 reversed_value = (reversed_value << 1) | (value & 1); | |
1895 value >>= 1; | |
1896 } | |
1897 result[i] = reversed_value; | |
1898 } | |
1899 | |
1900 dst.SetUintArray(vform, result); | |
1901 return dst; | |
1902 } | |
1903 | |
1904 LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst, | |
1905 const LogicVRegister& src, int revSize) { | |
1906 uint64_t result[16]; | |
1907 int laneCount = LaneCountFromFormat(vform); | |
1908 int laneSize = LaneSizeInBytesFromFormat(vform); | |
1909 int lanesPerLoop = revSize / laneSize; | |
1910 for (int i = 0; i < laneCount; i += lanesPerLoop) { | |
1911 for (int j = 0; j < lanesPerLoop; j++) { | |
1912 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j); | |
1913 } | |
1914 } | |
1915 dst.SetUintArray(vform, result); | |
1916 return dst; | |
1917 } | |
1918 | |
1919 LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst, | |
1920 const LogicVRegister& src) { | |
1921 return rev(vform, dst, src, 2); | |
1922 } | |
1923 | |
1924 LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst, | |
1925 const LogicVRegister& src) { | |
1926 return rev(vform, dst, src, 4); | |
1927 } | |
1928 | |
1929 LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst, | |
1930 const LogicVRegister& src) { | |
1931 return rev(vform, dst, src, 8); | |
1932 } | |
1933 | |
1934 LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst, | |
1935 const LogicVRegister& src, bool is_signed, | |
1936 bool do_accumulate) { | |
1937 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform); | |
1938 DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U); | |
1939 DCHECK_LE(LaneCountFromFormat(vform), 8); | |
1940 | |
1941 uint64_t result[8]; | |
1942 int lane_count = LaneCountFromFormat(vform); | |
1943 for (int i = 0; i < lane_count; i++) { | |
1944 if (is_signed) { | |
1945 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) + | |
1946 src.Int(vformsrc, 2 * i + 1)); | |
1947 } else { | |
1948 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1); | |
1949 } | |
1950 } | |
1951 | |
1952 dst.ClearForWrite(vform); | |
1953 for (int i = 0; i < lane_count; ++i) { | |
1954 if (do_accumulate) { | |
1955 result[i] += dst.Uint(vform, i); | |
1956 } | |
1957 dst.SetUint(vform, i, result[i]); | |
1958 } | |
1959 | |
1960 return dst; | |
1961 } | |
1962 | |
1963 LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst, | |
1964 const LogicVRegister& src) { | |
1965 return addlp(vform, dst, src, true, false); | |
1966 } | |
1967 | |
1968 LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst, | |
1969 const LogicVRegister& src) { | |
1970 return addlp(vform, dst, src, false, false); | |
1971 } | |
1972 | |
1973 LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst, | |
1974 const LogicVRegister& src) { | |
1975 return addlp(vform, dst, src, true, true); | |
1976 } | |
1977 | |
1978 LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst, | |
1979 const LogicVRegister& src) { | |
1980 return addlp(vform, dst, src, false, true); | |
1981 } | |
1982 | |
1983 LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst, | |
1984 const LogicVRegister& src1, | |
1985 const LogicVRegister& src2, int index) { | |
1986 uint8_t result[16]; | |
1987 int laneCount = LaneCountFromFormat(vform); | |
1988 for (int i = 0; i < laneCount - index; ++i) { | |
1989 result[i] = src1.Uint(vform, i + index); | |
1990 } | |
1991 for (int i = 0; i < index; ++i) { | |
1992 result[laneCount - index + i] = src2.Uint(vform, i); | |
1993 } | |
1994 dst.ClearForWrite(vform); | |
1995 for (int i = 0; i < laneCount; ++i) { | |
1996 dst.SetUint(vform, i, result[i]); | |
1997 } | |
1998 return dst; | |
1999 } | |
2000 | |
2001 LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst, | |
2002 const LogicVRegister& src, | |
2003 int src_index) { | |
2004 int laneCount = LaneCountFromFormat(vform); | |
2005 uint64_t value = src.Uint(vform, src_index); | |
2006 dst.ClearForWrite(vform); | |
2007 for (int i = 0; i < laneCount; ++i) { | |
2008 dst.SetUint(vform, i, value); | |
2009 } | |
2010 return dst; | |
2011 } | |
2012 | |
2013 LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst, | |
2014 uint64_t imm) { | |
2015 int laneCount = LaneCountFromFormat(vform); | |
2016 uint64_t value = imm & MaxUintFromFormat(vform); | |
2017 dst.ClearForWrite(vform); | |
2018 for (int i = 0; i < laneCount; ++i) { | |
2019 dst.SetUint(vform, i, value); | |
2020 } | |
2021 return dst; | |
2022 } | |
2023 | |
2024 LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst, | |
2025 int dst_index, const LogicVRegister& src, | |
2026 int src_index) { | |
2027 dst.SetUint(vform, dst_index, src.Uint(vform, src_index)); | |
2028 return dst; | |
2029 } | |
2030 | |
2031 LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst, | |
2032 int dst_index, uint64_t imm) { | |
2033 uint64_t value = imm & MaxUintFromFormat(vform); | |
2034 dst.SetUint(vform, dst_index, value); | |
2035 return dst; | |
2036 } | |
2037 | |
2038 LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst, | |
2039 uint64_t imm) { | |
2040 int laneCount = LaneCountFromFormat(vform); | |
2041 dst.ClearForWrite(vform); | |
2042 for (int i = 0; i < laneCount; ++i) { | |
2043 dst.SetUint(vform, i, imm); | |
2044 } | |
2045 return dst; | |
2046 } | |
2047 | |
2048 LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst, | |
2049 uint64_t imm) { | |
2050 int laneCount = LaneCountFromFormat(vform); | |
2051 dst.ClearForWrite(vform); | |
2052 for (int i = 0; i < laneCount; ++i) { | |
2053 dst.SetUint(vform, i, ~imm); | |
2054 } | |
2055 return dst; | |
2056 } | |
2057 | |
2058 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst, | |
2059 const LogicVRegister& src, uint64_t imm) { | |
2060 uint64_t result[16]; | |
2061 int laneCount = LaneCountFromFormat(vform); | |
2062 for (int i = 0; i < laneCount; ++i) { | |
2063 result[i] = src.Uint(vform, i) | imm; | |
2064 } | |
2065 dst.SetUintArray(vform, result); | |
2066 return dst; | |
2067 } | |
2068 | |
2069 LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst, | |
2070 const LogicVRegister& src) { | |
2071 VectorFormat vform_half = VectorFormatHalfWidth(vform); | |
2072 | |
2073 dst.ClearForWrite(vform); | |
2074 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
2075 dst.SetUint(vform, i, src.Uint(vform_half, i)); | |
2076 } | |
2077 return dst; | |
2078 } | |
2079 | |
2080 LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst, | |
2081 const LogicVRegister& src) { | |
2082 VectorFormat vform_half = VectorFormatHalfWidth(vform); | |
2083 | |
2084 dst.ClearForWrite(vform); | |
2085 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
2086 dst.SetInt(vform, i, src.Int(vform_half, i)); | |
2087 } | |
2088 return dst; | |
2089 } | |
2090 | |
2091 LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst, | |
2092 const LogicVRegister& src) { | |
2093 VectorFormat vform_half = VectorFormatHalfWidth(vform); | |
2094 int lane_count = LaneCountFromFormat(vform); | |
2095 | |
2096 dst.ClearForWrite(vform); | |
2097 for (int i = 0; i < lane_count; i++) { | |
2098 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i)); | |
2099 } | |
2100 return dst; | |
2101 } | |
2102 | |
2103 LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst, | |
2104 const LogicVRegister& src) { | |
2105 VectorFormat vform_half = VectorFormatHalfWidth(vform); | |
2106 int lane_count = LaneCountFromFormat(vform); | |
2107 | |
2108 dst.ClearForWrite(vform); | |
2109 for (int i = 0; i < lane_count; i++) { | |
2110 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i)); | |
2111 } | |
2112 return dst; | |
2113 } | |
2114 | |
2115 LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst, | |
2116 const LogicVRegister& src, int shift) { | |
2117 SimVRegister temp; | |
2118 VectorFormat vform_src = VectorFormatDoubleWidth(vform); | |
2119 VectorFormat vform_dst = vform; | |
2120 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift); | |
2121 return ExtractNarrow(vform_dst, dst, false, shifted_src, false); | |
2122 } | |
2123 | |
2124 LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst, | |
2125 const LogicVRegister& src, int shift) { | |
2126 SimVRegister temp; | |
2127 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); | |
2128 VectorFormat vformdst = vform; | |
2129 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift); | |
2130 return ExtractNarrow(vformdst, dst, false, shifted_src, false); | |
2131 } | |
2132 | |
2133 LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst, | |
2134 const LogicVRegister& src, int shift) { | |
2135 SimVRegister temp; | |
2136 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); | |
2137 VectorFormat vformdst = vform; | |
2138 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); | |
2139 return ExtractNarrow(vformdst, dst, false, shifted_src, false); | |
2140 } | |
2141 | |
2142 LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst, | |
2143 const LogicVRegister& src, int shift) { | |
2144 SimVRegister temp; | |
2145 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); | |
2146 VectorFormat vformdst = vform; | |
2147 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); | |
2148 return ExtractNarrow(vformdst, dst, false, shifted_src, false); | |
2149 } | |
2150 | |
2151 LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst, | |
2152 const LogicVRegister& ind, | |
2153 bool zero_out_of_bounds, | |
2154 const LogicVRegister* tab1, | |
2155 const LogicVRegister* tab2, | |
2156 const LogicVRegister* tab3, | |
2157 const LogicVRegister* tab4) { | |
2158 DCHECK_NOT_NULL(tab1); | |
2159 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4}; | |
2160 uint64_t result[kMaxLanesPerVector]; | |
2161 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
2162 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i); | |
2163 } | |
2164 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
2165 uint64_t j = ind.Uint(vform, i); | |
2166 int tab_idx = static_cast<int>(j >> 4); | |
2167 int j_idx = static_cast<int>(j & 15); | |
2168 if ((tab_idx < 4) && (tab[tab_idx] != NULL)) { | |
2169 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx); | |
2170 } | |
2171 } | |
2172 dst.SetUintArray(vform, result); | |
2173 return dst; | |
2174 } | |
2175 | |
2176 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, | |
2177 const LogicVRegister& tab, | |
2178 const LogicVRegister& ind) { | |
2179 return Table(vform, dst, ind, true, &tab); | |
2180 } | |
2181 | |
2182 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, | |
2183 const LogicVRegister& tab, | |
2184 const LogicVRegister& tab2, | |
2185 const LogicVRegister& ind) { | |
2186 return Table(vform, dst, ind, true, &tab, &tab2); | |
2187 } | |
2188 | |
2189 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, | |
2190 const LogicVRegister& tab, | |
2191 const LogicVRegister& tab2, | |
2192 const LogicVRegister& tab3, | |
2193 const LogicVRegister& ind) { | |
2194 return Table(vform, dst, ind, true, &tab, &tab2, &tab3); | |
2195 } | |
2196 | |
2197 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, | |
2198 const LogicVRegister& tab, | |
2199 const LogicVRegister& tab2, | |
2200 const LogicVRegister& tab3, | |
2201 const LogicVRegister& tab4, | |
2202 const LogicVRegister& ind) { | |
2203 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4); | |
2204 } | |
2205 | |
2206 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, | |
2207 const LogicVRegister& tab, | |
2208 const LogicVRegister& ind) { | |
2209 return Table(vform, dst, ind, false, &tab); | |
2210 } | |
2211 | |
2212 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, | |
2213 const LogicVRegister& tab, | |
2214 const LogicVRegister& tab2, | |
2215 const LogicVRegister& ind) { | |
2216 return Table(vform, dst, ind, false, &tab, &tab2); | |
2217 } | |
2218 | |
2219 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, | |
2220 const LogicVRegister& tab, | |
2221 const LogicVRegister& tab2, | |
2222 const LogicVRegister& tab3, | |
2223 const LogicVRegister& ind) { | |
2224 return Table(vform, dst, ind, false, &tab, &tab2, &tab3); | |
2225 } | |
2226 | |
2227 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, | |
2228 const LogicVRegister& tab, | |
2229 const LogicVRegister& tab2, | |
2230 const LogicVRegister& tab3, | |
2231 const LogicVRegister& tab4, | |
2232 const LogicVRegister& ind) { | |
2233 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4); | |
2234 } | |
2235 | |
2236 LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst, | |
2237 const LogicVRegister& src, int shift) { | |
2238 return shrn(vform, dst, src, shift).UnsignedSaturate(vform); | |
2239 } | |
2240 | |
2241 LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst, | |
2242 const LogicVRegister& src, int shift) { | |
2243 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform); | |
2244 } | |
2245 | |
2246 LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst, | |
2247 const LogicVRegister& src, int shift) { | |
2248 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform); | |
2249 } | |
2250 | |
2251 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst, | |
2252 const LogicVRegister& src, int shift) { | |
2253 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform); | |
2254 } | |
2255 | |
2256 LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst, | |
2257 const LogicVRegister& src, int shift) { | |
2258 SimVRegister temp; | |
2259 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); | |
2260 VectorFormat vformdst = vform; | |
2261 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); | |
2262 return sqxtn(vformdst, dst, shifted_src); | |
2263 } | |
2264 | |
2265 LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst, | |
2266 const LogicVRegister& src, int shift) { | |
2267 SimVRegister temp; | |
2268 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); | |
2269 VectorFormat vformdst = vform; | |
2270 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); | |
2271 return sqxtn(vformdst, dst, shifted_src); | |
2272 } | |
2273 | |
2274 LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst, | |
2275 const LogicVRegister& src, int shift) { | |
2276 SimVRegister temp; | |
2277 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); | |
2278 VectorFormat vformdst = vform; | |
2279 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); | |
2280 return sqxtn(vformdst, dst, shifted_src); | |
2281 } | |
2282 | |
2283 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst, | |
2284 const LogicVRegister& src, int shift) { | |
2285 SimVRegister temp; | |
2286 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); | |
2287 VectorFormat vformdst = vform; | |
2288 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); | |
2289 return sqxtn(vformdst, dst, shifted_src); | |
2290 } | |
2291 | |
2292 LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst, | |
2293 const LogicVRegister& src, int shift) { | |
2294 SimVRegister temp; | |
2295 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); | |
2296 VectorFormat vformdst = vform; | |
2297 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); | |
2298 return sqxtun(vformdst, dst, shifted_src); | |
2299 } | |
2300 | |
2301 LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst, | |
2302 const LogicVRegister& src, int shift) { | |
2303 SimVRegister temp; | |
2304 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); | |
2305 VectorFormat vformdst = vform; | |
2306 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); | |
2307 return sqxtun(vformdst, dst, shifted_src); | |
2308 } | |
2309 | |
2310 LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst, | |
2311 const LogicVRegister& src, int shift) { | |
2312 SimVRegister temp; | |
2313 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); | |
2314 VectorFormat vformdst = vform; | |
2315 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); | |
2316 return sqxtun(vformdst, dst, shifted_src); | |
2317 } | |
2318 | |
2319 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst, | |
2320 const LogicVRegister& src, int shift) { | |
2321 SimVRegister temp; | |
2322 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); | |
2323 VectorFormat vformdst = vform; | |
2324 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); | |
2325 return sqxtun(vformdst, dst, shifted_src); | |
2326 } | |
2327 | |
2328 LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst, | |
2329 const LogicVRegister& src1, | |
2330 const LogicVRegister& src2) { | |
2331 SimVRegister temp1, temp2; | |
2332 uxtl(vform, temp1, src1); | |
2333 uxtl(vform, temp2, src2); | |
2334 add(vform, dst, temp1, temp2); | |
2335 return dst; | |
2336 } | |
2337 | |
2338 LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst, | |
2339 const LogicVRegister& src1, | |
2340 const LogicVRegister& src2) { | |
2341 SimVRegister temp1, temp2; | |
2342 uxtl2(vform, temp1, src1); | |
2343 uxtl2(vform, temp2, src2); | |
2344 add(vform, dst, temp1, temp2); | |
2345 return dst; | |
2346 } | |
2347 | |
2348 LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst, | |
2349 const LogicVRegister& src1, | |
2350 const LogicVRegister& src2) { | |
2351 SimVRegister temp; | |
2352 uxtl(vform, temp, src2); | |
2353 add(vform, dst, src1, temp); | |
2354 return dst; | |
2355 } | |
2356 | |
2357 LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst, | |
2358 const LogicVRegister& src1, | |
2359 const LogicVRegister& src2) { | |
2360 SimVRegister temp; | |
2361 uxtl2(vform, temp, src2); | |
2362 add(vform, dst, src1, temp); | |
2363 return dst; | |
2364 } | |
2365 | |
2366 LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst, | |
2367 const LogicVRegister& src1, | |
2368 const LogicVRegister& src2) { | |
2369 SimVRegister temp1, temp2; | |
2370 sxtl(vform, temp1, src1); | |
2371 sxtl(vform, temp2, src2); | |
2372 add(vform, dst, temp1, temp2); | |
2373 return dst; | |
2374 } | |
2375 | |
2376 LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst, | |
2377 const LogicVRegister& src1, | |
2378 const LogicVRegister& src2) { | |
2379 SimVRegister temp1, temp2; | |
2380 sxtl2(vform, temp1, src1); | |
2381 sxtl2(vform, temp2, src2); | |
2382 add(vform, dst, temp1, temp2); | |
2383 return dst; | |
2384 } | |
2385 | |
2386 LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst, | |
2387 const LogicVRegister& src1, | |
2388 const LogicVRegister& src2) { | |
2389 SimVRegister temp; | |
2390 sxtl(vform, temp, src2); | |
2391 add(vform, dst, src1, temp); | |
2392 return dst; | |
2393 } | |
2394 | |
2395 LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst, | |
2396 const LogicVRegister& src1, | |
2397 const LogicVRegister& src2) { | |
2398 SimVRegister temp; | |
2399 sxtl2(vform, temp, src2); | |
2400 add(vform, dst, src1, temp); | |
2401 return dst; | |
2402 } | |
2403 | |
2404 LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst, | |
2405 const LogicVRegister& src1, | |
2406 const LogicVRegister& src2) { | |
2407 SimVRegister temp1, temp2; | |
2408 uxtl(vform, temp1, src1); | |
2409 uxtl(vform, temp2, src2); | |
2410 sub(vform, dst, temp1, temp2); | |
2411 return dst; | |
2412 } | |
2413 | |
2414 LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst, | |
2415 const LogicVRegister& src1, | |
2416 const LogicVRegister& src2) { | |
2417 SimVRegister temp1, temp2; | |
2418 uxtl2(vform, temp1, src1); | |
2419 uxtl2(vform, temp2, src2); | |
2420 sub(vform, dst, temp1, temp2); | |
2421 return dst; | |
2422 } | |
2423 | |
2424 LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst, | |
2425 const LogicVRegister& src1, | |
2426 const LogicVRegister& src2) { | |
2427 SimVRegister temp; | |
2428 uxtl(vform, temp, src2); | |
2429 sub(vform, dst, src1, temp); | |
2430 return dst; | |
2431 } | |
2432 | |
2433 LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst, | |
2434 const LogicVRegister& src1, | |
2435 const LogicVRegister& src2) { | |
2436 SimVRegister temp; | |
2437 uxtl2(vform, temp, src2); | |
2438 sub(vform, dst, src1, temp); | |
2439 return dst; | |
2440 } | |
2441 | |
2442 LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst, | |
2443 const LogicVRegister& src1, | |
2444 const LogicVRegister& src2) { | |
2445 SimVRegister temp1, temp2; | |
2446 sxtl(vform, temp1, src1); | |
2447 sxtl(vform, temp2, src2); | |
2448 sub(vform, dst, temp1, temp2); | |
2449 return dst; | |
2450 } | |
2451 | |
2452 LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst, | |
2453 const LogicVRegister& src1, | |
2454 const LogicVRegister& src2) { | |
2455 SimVRegister temp1, temp2; | |
2456 sxtl2(vform, temp1, src1); | |
2457 sxtl2(vform, temp2, src2); | |
2458 sub(vform, dst, temp1, temp2); | |
2459 return dst; | |
2460 } | |
2461 | |
2462 LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst, | |
2463 const LogicVRegister& src1, | |
2464 const LogicVRegister& src2) { | |
2465 SimVRegister temp; | |
2466 sxtl(vform, temp, src2); | |
2467 sub(vform, dst, src1, temp); | |
2468 return dst; | |
2469 } | |
2470 | |
2471 LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst, | |
2472 const LogicVRegister& src1, | |
2473 const LogicVRegister& src2) { | |
2474 SimVRegister temp; | |
2475 sxtl2(vform, temp, src2); | |
2476 sub(vform, dst, src1, temp); | |
2477 return dst; | |
2478 } | |
2479 | |
2480 LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst, | |
2481 const LogicVRegister& src1, | |
2482 const LogicVRegister& src2) { | |
2483 SimVRegister temp1, temp2; | |
2484 uxtl(vform, temp1, src1); | |
2485 uxtl(vform, temp2, src2); | |
2486 uaba(vform, dst, temp1, temp2); | |
2487 return dst; | |
2488 } | |
2489 | |
2490 LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst, | |
2491 const LogicVRegister& src1, | |
2492 const LogicVRegister& src2) { | |
2493 SimVRegister temp1, temp2; | |
2494 uxtl2(vform, temp1, src1); | |
2495 uxtl2(vform, temp2, src2); | |
2496 uaba(vform, dst, temp1, temp2); | |
2497 return dst; | |
2498 } | |
2499 | |
2500 LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst, | |
2501 const LogicVRegister& src1, | |
2502 const LogicVRegister& src2) { | |
2503 SimVRegister temp1, temp2; | |
2504 sxtl(vform, temp1, src1); | |
2505 sxtl(vform, temp2, src2); | |
2506 saba(vform, dst, temp1, temp2); | |
2507 return dst; | |
2508 } | |
2509 | |
2510 LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst, | |
2511 const LogicVRegister& src1, | |
2512 const LogicVRegister& src2) { | |
2513 SimVRegister temp1, temp2; | |
2514 sxtl2(vform, temp1, src1); | |
2515 sxtl2(vform, temp2, src2); | |
2516 saba(vform, dst, temp1, temp2); | |
2517 return dst; | |
2518 } | |
2519 | |
2520 LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst, | |
2521 const LogicVRegister& src1, | |
2522 const LogicVRegister& src2) { | |
2523 SimVRegister temp1, temp2; | |
2524 uxtl(vform, temp1, src1); | |
2525 uxtl(vform, temp2, src2); | |
2526 AbsDiff(vform, dst, temp1, temp2, false); | |
2527 return dst; | |
2528 } | |
2529 | |
2530 LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst, | |
2531 const LogicVRegister& src1, | |
2532 const LogicVRegister& src2) { | |
2533 SimVRegister temp1, temp2; | |
2534 uxtl2(vform, temp1, src1); | |
2535 uxtl2(vform, temp2, src2); | |
2536 AbsDiff(vform, dst, temp1, temp2, false); | |
2537 return dst; | |
2538 } | |
2539 | |
2540 LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst, | |
2541 const LogicVRegister& src1, | |
2542 const LogicVRegister& src2) { | |
2543 SimVRegister temp1, temp2; | |
2544 sxtl(vform, temp1, src1); | |
2545 sxtl(vform, temp2, src2); | |
2546 AbsDiff(vform, dst, temp1, temp2, true); | |
2547 return dst; | |
2548 } | |
2549 | |
2550 LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst, | |
2551 const LogicVRegister& src1, | |
2552 const LogicVRegister& src2) { | |
2553 SimVRegister temp1, temp2; | |
2554 sxtl2(vform, temp1, src1); | |
2555 sxtl2(vform, temp2, src2); | |
2556 AbsDiff(vform, dst, temp1, temp2, true); | |
2557 return dst; | |
2558 } | |
2559 | |
2560 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst, | |
2561 const LogicVRegister& src1, | |
2562 const LogicVRegister& src2) { | |
2563 SimVRegister temp1, temp2; | |
2564 uxtl(vform, temp1, src1); | |
2565 uxtl(vform, temp2, src2); | |
2566 mul(vform, dst, temp1, temp2); | |
2567 return dst; | |
2568 } | |
2569 | |
2570 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst, | |
2571 const LogicVRegister& src1, | |
2572 const LogicVRegister& src2) { | |
2573 SimVRegister temp1, temp2; | |
2574 uxtl2(vform, temp1, src1); | |
2575 uxtl2(vform, temp2, src2); | |
2576 mul(vform, dst, temp1, temp2); | |
2577 return dst; | |
2578 } | |
2579 | |
2580 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst, | |
2581 const LogicVRegister& src1, | |
2582 const LogicVRegister& src2) { | |
2583 SimVRegister temp1, temp2; | |
2584 sxtl(vform, temp1, src1); | |
2585 sxtl(vform, temp2, src2); | |
2586 mul(vform, dst, temp1, temp2); | |
2587 return dst; | |
2588 } | |
2589 | |
2590 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst, | |
2591 const LogicVRegister& src1, | |
2592 const LogicVRegister& src2) { | |
2593 SimVRegister temp1, temp2; | |
2594 sxtl2(vform, temp1, src1); | |
2595 sxtl2(vform, temp2, src2); | |
2596 mul(vform, dst, temp1, temp2); | |
2597 return dst; | |
2598 } | |
2599 | |
2600 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst, | |
2601 const LogicVRegister& src1, | |
2602 const LogicVRegister& src2) { | |
2603 SimVRegister temp1, temp2; | |
2604 uxtl(vform, temp1, src1); | |
2605 uxtl(vform, temp2, src2); | |
2606 mls(vform, dst, temp1, temp2); | |
2607 return dst; | |
2608 } | |
2609 | |
2610 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst, | |
2611 const LogicVRegister& src1, | |
2612 const LogicVRegister& src2) { | |
2613 SimVRegister temp1, temp2; | |
2614 uxtl2(vform, temp1, src1); | |
2615 uxtl2(vform, temp2, src2); | |
2616 mls(vform, dst, temp1, temp2); | |
2617 return dst; | |
2618 } | |
2619 | |
2620 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst, | |
2621 const LogicVRegister& src1, | |
2622 const LogicVRegister& src2) { | |
2623 SimVRegister temp1, temp2; | |
2624 sxtl(vform, temp1, src1); | |
2625 sxtl(vform, temp2, src2); | |
2626 mls(vform, dst, temp1, temp2); | |
2627 return dst; | |
2628 } | |
2629 | |
2630 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst, | |
2631 const LogicVRegister& src1, | |
2632 const LogicVRegister& src2) { | |
2633 SimVRegister temp1, temp2; | |
2634 sxtl2(vform, temp1, src1); | |
2635 sxtl2(vform, temp2, src2); | |
2636 mls(vform, dst, temp1, temp2); | |
2637 return dst; | |
2638 } | |
2639 | |
2640 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst, | |
2641 const LogicVRegister& src1, | |
2642 const LogicVRegister& src2) { | |
2643 SimVRegister temp1, temp2; | |
2644 uxtl(vform, temp1, src1); | |
2645 uxtl(vform, temp2, src2); | |
2646 mla(vform, dst, temp1, temp2); | |
2647 return dst; | |
2648 } | |
2649 | |
2650 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst, | |
2651 const LogicVRegister& src1, | |
2652 const LogicVRegister& src2) { | |
2653 SimVRegister temp1, temp2; | |
2654 uxtl2(vform, temp1, src1); | |
2655 uxtl2(vform, temp2, src2); | |
2656 mla(vform, dst, temp1, temp2); | |
2657 return dst; | |
2658 } | |
2659 | |
2660 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst, | |
2661 const LogicVRegister& src1, | |
2662 const LogicVRegister& src2) { | |
2663 SimVRegister temp1, temp2; | |
2664 sxtl(vform, temp1, src1); | |
2665 sxtl(vform, temp2, src2); | |
2666 mla(vform, dst, temp1, temp2); | |
2667 return dst; | |
2668 } | |
2669 | |
2670 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst, | |
2671 const LogicVRegister& src1, | |
2672 const LogicVRegister& src2) { | |
2673 SimVRegister temp1, temp2; | |
2674 sxtl2(vform, temp1, src1); | |
2675 sxtl2(vform, temp2, src2); | |
2676 mla(vform, dst, temp1, temp2); | |
2677 return dst; | |
2678 } | |
2679 | |
2680 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst, | |
2681 const LogicVRegister& src1, | |
2682 const LogicVRegister& src2) { | |
2683 SimVRegister temp; | |
2684 LogicVRegister product = sqdmull(vform, temp, src1, src2); | |
2685 return add(vform, dst, dst, product).SignedSaturate(vform); | |
2686 } | |
2687 | |
2688 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst, | |
2689 const LogicVRegister& src1, | |
2690 const LogicVRegister& src2) { | |
2691 SimVRegister temp; | |
2692 LogicVRegister product = sqdmull2(vform, temp, src1, src2); | |
2693 return add(vform, dst, dst, product).SignedSaturate(vform); | |
2694 } | |
2695 | |
2696 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst, | |
2697 const LogicVRegister& src1, | |
2698 const LogicVRegister& src2) { | |
2699 SimVRegister temp; | |
2700 LogicVRegister product = sqdmull(vform, temp, src1, src2); | |
2701 return sub(vform, dst, dst, product).SignedSaturate(vform); | |
2702 } | |
2703 | |
2704 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst, | |
2705 const LogicVRegister& src1, | |
2706 const LogicVRegister& src2) { | |
2707 SimVRegister temp; | |
2708 LogicVRegister product = sqdmull2(vform, temp, src1, src2); | |
2709 return sub(vform, dst, dst, product).SignedSaturate(vform); | |
2710 } | |
2711 | |
2712 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst, | |
2713 const LogicVRegister& src1, | |
2714 const LogicVRegister& src2) { | |
2715 SimVRegister temp; | |
2716 LogicVRegister product = smull(vform, temp, src1, src2); | |
2717 return add(vform, dst, product, product).SignedSaturate(vform); | |
2718 } | |
2719 | |
2720 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst, | |
2721 const LogicVRegister& src1, | |
2722 const LogicVRegister& src2) { | |
2723 SimVRegister temp; | |
2724 LogicVRegister product = smull2(vform, temp, src1, src2); | |
2725 return add(vform, dst, product, product).SignedSaturate(vform); | |
2726 } | |
2727 | |
2728 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst, | |
2729 const LogicVRegister& src1, | |
2730 const LogicVRegister& src2, bool round) { | |
2731 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow. | |
2732 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1) | |
2733 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize. | |
2734 | |
2735 int esize = LaneSizeInBitsFromFormat(vform); | |
2736 int round_const = round ? (1 << (esize - 2)) : 0; | |
2737 int64_t product; | |
2738 | |
2739 dst.ClearForWrite(vform); | |
2740 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
2741 product = src1.Int(vform, i) * src2.Int(vform, i); | |
2742 product += round_const; | |
2743 product = product >> (esize - 1); | |
2744 | |
2745 if (product > MaxIntFromFormat(vform)) { | |
2746 product = MaxIntFromFormat(vform); | |
2747 } else if (product < MinIntFromFormat(vform)) { | |
2748 product = MinIntFromFormat(vform); | |
2749 } | |
2750 dst.SetInt(vform, i, product); | |
2751 } | |
2752 return dst; | |
2753 } | |
2754 | |
2755 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst, | |
2756 const LogicVRegister& src1, | |
2757 const LogicVRegister& src2) { | |
2758 return sqrdmulh(vform, dst, src1, src2, false); | |
2759 } | |
2760 | |
2761 LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst, | |
2762 const LogicVRegister& src1, | |
2763 const LogicVRegister& src2) { | |
2764 SimVRegister temp; | |
2765 add(VectorFormatDoubleWidth(vform), temp, src1, src2); | |
2766 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); | |
2767 return dst; | |
2768 } | |
2769 | |
2770 LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst, | |
2771 const LogicVRegister& src1, | |
2772 const LogicVRegister& src2) { | |
2773 SimVRegister temp; | |
2774 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); | |
2775 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); | |
2776 return dst; | |
2777 } | |
2778 | |
2779 LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst, | |
2780 const LogicVRegister& src1, | |
2781 const LogicVRegister& src2) { | |
2782 SimVRegister temp; | |
2783 add(VectorFormatDoubleWidth(vform), temp, src1, src2); | |
2784 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); | |
2785 return dst; | |
2786 } | |
2787 | |
2788 LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst, | |
2789 const LogicVRegister& src1, | |
2790 const LogicVRegister& src2) { | |
2791 SimVRegister temp; | |
2792 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); | |
2793 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); | |
2794 return dst; | |
2795 } | |
2796 | |
2797 LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst, | |
2798 const LogicVRegister& src1, | |
2799 const LogicVRegister& src2) { | |
2800 SimVRegister temp; | |
2801 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); | |
2802 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); | |
2803 return dst; | |
2804 } | |
2805 | |
2806 LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst, | |
2807 const LogicVRegister& src1, | |
2808 const LogicVRegister& src2) { | |
2809 SimVRegister temp; | |
2810 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); | |
2811 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); | |
2812 return dst; | |
2813 } | |
2814 | |
2815 LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst, | |
2816 const LogicVRegister& src1, | |
2817 const LogicVRegister& src2) { | |
2818 SimVRegister temp; | |
2819 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); | |
2820 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); | |
2821 return dst; | |
2822 } | |
2823 | |
2824 LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst, | |
2825 const LogicVRegister& src1, | |
2826 const LogicVRegister& src2) { | |
2827 SimVRegister temp; | |
2828 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); | |
2829 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); | |
2830 return dst; | |
2831 } | |
2832 | |
2833 LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst, | |
2834 const LogicVRegister& src1, | |
2835 const LogicVRegister& src2) { | |
2836 uint64_t result[16]; | |
2837 int laneCount = LaneCountFromFormat(vform); | |
2838 int pairs = laneCount / 2; | |
2839 for (int i = 0; i < pairs; ++i) { | |
2840 result[2 * i] = src1.Uint(vform, 2 * i); | |
2841 result[(2 * i) + 1] = src2.Uint(vform, 2 * i); | |
2842 } | |
2843 | |
2844 dst.SetUintArray(vform, result); | |
2845 return dst; | |
2846 } | |
2847 | |
2848 LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst, | |
2849 const LogicVRegister& src1, | |
2850 const LogicVRegister& src2) { | |
2851 uint64_t result[16]; | |
2852 int laneCount = LaneCountFromFormat(vform); | |
2853 int pairs = laneCount / 2; | |
2854 for (int i = 0; i < pairs; ++i) { | |
2855 result[2 * i] = src1.Uint(vform, (2 * i) + 1); | |
2856 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1); | |
2857 } | |
2858 | |
2859 dst.SetUintArray(vform, result); | |
2860 return dst; | |
2861 } | |
2862 | |
2863 LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst, | |
2864 const LogicVRegister& src1, | |
2865 const LogicVRegister& src2) { | |
2866 uint64_t result[16]; | |
2867 int laneCount = LaneCountFromFormat(vform); | |
2868 int pairs = laneCount / 2; | |
2869 for (int i = 0; i < pairs; ++i) { | |
2870 result[2 * i] = src1.Uint(vform, i); | |
2871 result[(2 * i) + 1] = src2.Uint(vform, i); | |
2872 } | |
2873 | |
2874 dst.SetUintArray(vform, result); | |
2875 return dst; | |
2876 } | |
2877 | |
2878 LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst, | |
2879 const LogicVRegister& src1, | |
2880 const LogicVRegister& src2) { | |
2881 uint64_t result[16]; | |
2882 int laneCount = LaneCountFromFormat(vform); | |
2883 int pairs = laneCount / 2; | |
2884 for (int i = 0; i < pairs; ++i) { | |
2885 result[2 * i] = src1.Uint(vform, pairs + i); | |
2886 result[(2 * i) + 1] = src2.Uint(vform, pairs + i); | |
2887 } | |
2888 | |
2889 dst.SetUintArray(vform, result); | |
2890 return dst; | |
2891 } | |
2892 | |
2893 LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst, | |
2894 const LogicVRegister& src1, | |
2895 const LogicVRegister& src2) { | |
2896 uint64_t result[32]; | |
2897 int laneCount = LaneCountFromFormat(vform); | |
2898 for (int i = 0; i < laneCount; ++i) { | |
2899 result[i] = src1.Uint(vform, i); | |
2900 result[laneCount + i] = src2.Uint(vform, i); | |
2901 } | |
2902 | |
2903 dst.ClearForWrite(vform); | |
2904 for (int i = 0; i < laneCount; ++i) { | |
2905 dst.SetUint(vform, i, result[2 * i]); | |
2906 } | |
2907 return dst; | |
2908 } | |
2909 | |
2910 LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst, | |
2911 const LogicVRegister& src1, | |
2912 const LogicVRegister& src2) { | |
2913 uint64_t result[32]; | |
2914 int laneCount = LaneCountFromFormat(vform); | |
2915 for (int i = 0; i < laneCount; ++i) { | |
2916 result[i] = src1.Uint(vform, i); | |
2917 result[laneCount + i] = src2.Uint(vform, i); | |
2918 } | |
2919 | |
2920 dst.ClearForWrite(vform); | |
2921 for (int i = 0; i < laneCount; ++i) { | |
2922 dst.SetUint(vform, i, result[(2 * i) + 1]); | |
2923 } | |
2924 return dst; | |
2925 } | |
2926 | |
2927 template <typename T> | |
2928 T Simulator::FPAdd(T op1, T op2) { | |
2929 T result = FPProcessNaNs(op1, op2); | |
2930 if (std::isnan(result)) return result; | |
2931 | |
2932 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) { | |
2933 // inf + -inf returns the default NaN. | |
2934 FPProcessException(); | |
2935 return FPDefaultNaN<T>(); | |
2936 } else { | |
2937 // Other cases should be handled by standard arithmetic. | |
2938 return op1 + op2; | |
2939 } | |
2940 } | |
2941 | |
2942 template <typename T> | |
2943 T Simulator::FPSub(T op1, T op2) { | |
2944 // NaNs should be handled elsewhere. | |
2945 DCHECK(!std::isnan(op1) && !std::isnan(op2)); | |
2946 | |
2947 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) { | |
2948 // inf - inf returns the default NaN. | |
2949 FPProcessException(); | |
2950 return FPDefaultNaN<T>(); | |
2951 } else { | |
2952 // Other cases should be handled by standard arithmetic. | |
2953 return op1 - op2; | |
2954 } | |
2955 } | |
2956 | |
2957 template <typename T> | |
2958 T Simulator::FPMul(T op1, T op2) { | |
2959 // NaNs should be handled elsewhere. | |
2960 DCHECK(!std::isnan(op1) && !std::isnan(op2)); | |
2961 | |
2962 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { | |
2963 // inf * 0.0 returns the default NaN. | |
2964 FPProcessException(); | |
2965 return FPDefaultNaN<T>(); | |
2966 } else { | |
2967 // Other cases should be handled by standard arithmetic. | |
2968 return op1 * op2; | |
2969 } | |
2970 } | |
2971 | |
2972 template <typename T> | |
2973 T Simulator::FPMulx(T op1, T op2) { | |
2974 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { | |
2975 // inf * 0.0 returns +/-2.0. | |
2976 T two = 2.0; | |
2977 return copysign(1.0, op1) * copysign(1.0, op2) * two; | |
2978 } | |
2979 return FPMul(op1, op2); | |
2980 } | |
2981 | |
2982 template <typename T> | |
2983 T Simulator::FPMulAdd(T a, T op1, T op2) { | |
2984 T result = FPProcessNaNs3(a, op1, op2); | |
2985 | |
2986 T sign_a = copysign(1.0, a); | |
2987 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2); | |
2988 bool isinf_prod = std::isinf(op1) || std::isinf(op2); | |
2989 bool operation_generates_nan = | |
2990 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0 | |
2991 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf | |
2992 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf | |
2993 | |
2994 if (std::isnan(result)) { | |
2995 // Generated NaNs override quiet NaNs propagated from a. | |
2996 if (operation_generates_nan && IsQuietNaN(a)) { | |
2997 FPProcessException(); | |
2998 return FPDefaultNaN<T>(); | |
2999 } else { | |
3000 return result; | |
3001 } | |
3002 } | |
3003 | |
3004 // If the operation would produce a NaN, return the default NaN. | |
3005 if (operation_generates_nan) { | |
3006 FPProcessException(); | |
3007 return FPDefaultNaN<T>(); | |
3008 } | |
3009 | |
3010 // Work around broken fma implementations for exact zero results: The sign of | |
3011 // exact 0.0 results is positive unless both a and op1 * op2 are negative. | |
3012 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) { | |
3013 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0; | |
3014 } | |
3015 | |
3016 result = FusedMultiplyAdd(op1, op2, a); | |
3017 DCHECK(!std::isnan(result)); | |
3018 | |
3019 // Work around broken fma implementations for rounded zero results: If a is | |
3020 // 0.0, the sign of the result is the sign of op1 * op2 before rounding. | |
3021 if ((a == 0.0) && (result == 0.0)) { | |
3022 return copysign(0.0, sign_prod); | |
3023 } | |
3024 | |
3025 return result; | |
3026 } | |
3027 | |
3028 template <typename T> | |
3029 T Simulator::FPDiv(T op1, T op2) { | |
3030 // NaNs should be handled elsewhere. | |
3031 DCHECK(!std::isnan(op1) && !std::isnan(op2)); | |
3032 | |
3033 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) { | |
3034 // inf / inf and 0.0 / 0.0 return the default NaN. | |
3035 FPProcessException(); | |
3036 return FPDefaultNaN<T>(); | |
3037 } else { | |
3038 if (op2 == 0.0) { | |
3039 FPProcessException(); | |
3040 if (!std::isnan(op1)) { | |
3041 double op1_sign = copysign(1.0, op1); | |
3042 double op2_sign = copysign(1.0, op2); | |
3043 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity); | |
3044 } | |
3045 } | |
3046 | |
3047 // Other cases should be handled by standard arithmetic. | |
3048 return op1 / op2; | |
3049 } | |
3050 } | |
3051 | |
3052 template <typename T> | |
3053 T Simulator::FPSqrt(T op) { | |
3054 if (std::isnan(op)) { | |
3055 return FPProcessNaN(op); | |
3056 } else if (op < 0.0) { | |
3057 FPProcessException(); | |
3058 return FPDefaultNaN<T>(); | |
3059 } else { | |
3060 return sqrt(op); | |
3061 } | |
3062 } | |
3063 | |
3064 template <typename T> | |
3065 T Simulator::FPMax(T a, T b) { | |
3066 T result = FPProcessNaNs(a, b); | |
3067 if (std::isnan(result)) return result; | |
3068 | |
3069 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { | |
3070 // a and b are zero, and the sign differs: return +0.0. | |
3071 return 0.0; | |
3072 } else { | |
3073 return (a > b) ? a : b; | |
3074 } | |
3075 } | |
3076 | |
3077 template <typename T> | |
3078 T Simulator::FPMaxNM(T a, T b) { | |
3079 if (IsQuietNaN(a) && !IsQuietNaN(b)) { | |
3080 a = kFP64NegativeInfinity; | |
3081 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { | |
3082 b = kFP64NegativeInfinity; | |
3083 } | |
3084 | |
3085 T result = FPProcessNaNs(a, b); | |
3086 return std::isnan(result) ? result : FPMax(a, b); | |
3087 } | |
3088 | |
3089 template <typename T> | |
3090 T Simulator::FPMin(T a, T b) { | |
3091 T result = FPProcessNaNs(a, b); | |
3092 if (std::isnan(result)) return result; | |
3093 | |
3094 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { | |
3095 // a and b are zero, and the sign differs: return -0.0. | |
3096 return -0.0; | |
3097 } else { | |
3098 return (a < b) ? a : b; | |
3099 } | |
3100 } | |
3101 | |
3102 template <typename T> | |
3103 T Simulator::FPMinNM(T a, T b) { | |
3104 if (IsQuietNaN(a) && !IsQuietNaN(b)) { | |
3105 a = kFP64PositiveInfinity; | |
3106 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { | |
3107 b = kFP64PositiveInfinity; | |
3108 } | |
3109 | |
3110 T result = FPProcessNaNs(a, b); | |
3111 return std::isnan(result) ? result : FPMin(a, b); | |
3112 } | |
3113 | |
3114 template <typename T> | |
3115 T Simulator::FPRecipStepFused(T op1, T op2) { | |
3116 const T two = 2.0; | |
3117 if ((std::isinf(op1) && (op2 == 0.0)) || | |
3118 ((op1 == 0.0) && (std::isinf(op2)))) { | |
3119 return two; | |
3120 } else if (std::isinf(op1) || std::isinf(op2)) { | |
3121 // Return +inf if signs match, otherwise -inf. | |
3122 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity | |
3123 : kFP64NegativeInfinity; | |
3124 } else { | |
3125 return FusedMultiplyAdd(op1, op2, two); | |
3126 } | |
3127 } | |
3128 | |
3129 template <typename T> | |
3130 T Simulator::FPRSqrtStepFused(T op1, T op2) { | |
3131 const T one_point_five = 1.5; | |
3132 const T two = 2.0; | |
3133 | |
3134 if ((std::isinf(op1) && (op2 == 0.0)) || | |
3135 ((op1 == 0.0) && (std::isinf(op2)))) { | |
3136 return one_point_five; | |
3137 } else if (std::isinf(op1) || std::isinf(op2)) { | |
3138 // Return +inf if signs match, otherwise -inf. | |
3139 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity | |
3140 : kFP64NegativeInfinity; | |
3141 } else { | |
3142 // The multiply-add-halve operation must be fully fused, so avoid interim | |
3143 // rounding by checking which operand can be losslessly divided by two | |
3144 // before doing the multiply-add. | |
3145 if (std::isnormal(op1 / two)) { | |
3146 return FusedMultiplyAdd(op1 / two, op2, one_point_five); | |
3147 } else if (std::isnormal(op2 / two)) { | |
3148 return FusedMultiplyAdd(op1, op2 / two, one_point_five); | |
3149 } else { | |
3150 // Neither operand is normal after halving: the result is dominated by | |
3151 // the addition term, so just return that. | |
3152 return one_point_five; | |
3153 } | |
3154 } | |
3155 } | |
3156 | |
3157 double Simulator::FPRoundInt(double value, FPRounding round_mode) { | |
3158 if ((value == 0.0) || (value == kFP64PositiveInfinity) || | |
3159 (value == kFP64NegativeInfinity)) { | |
3160 return value; | |
3161 } else if (std::isnan(value)) { | |
3162 return FPProcessNaN(value); | |
3163 } | |
3164 | |
3165 double int_result = std::floor(value); | |
3166 double error = value - int_result; | |
3167 switch (round_mode) { | |
3168 case FPTieAway: { | |
3169 // Take care of correctly handling the range ]-0.5, -0.0], which must | |
3170 // yield -0.0. | |
3171 if ((-0.5 < value) && (value < 0.0)) { | |
3172 int_result = -0.0; | |
3173 | |
3174 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) { | |
3175 // If the error is greater than 0.5, or is equal to 0.5 and the integer | |
3176 // result is positive, round up. | |
3177 int_result++; | |
3178 } | |
3179 break; | |
3180 } | |
3181 case FPTieEven: { | |
3182 // Take care of correctly handling the range [-0.5, -0.0], which must | |
3183 // yield -0.0. | |
3184 if ((-0.5 <= value) && (value < 0.0)) { | |
3185 int_result = -0.0; | |
3186 | |
3187 // If the error is greater than 0.5, or is equal to 0.5 and the integer | |
3188 // result is odd, round up. | |
3189 } else if ((error > 0.5) || | |
3190 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) { | |
3191 int_result++; | |
3192 } | |
3193 break; | |
3194 } | |
3195 case FPZero: { | |
3196 // If value>0 then we take floor(value) | |
3197 // otherwise, ceil(value). | |
3198 if (value < 0) { | |
3199 int_result = ceil(value); | |
3200 } | |
3201 break; | |
3202 } | |
3203 case FPNegativeInfinity: { | |
3204 // We always use floor(value). | |
3205 break; | |
3206 } | |
3207 case FPPositiveInfinity: { | |
3208 // Take care of correctly handling the range ]-1.0, -0.0], which must | |
3209 // yield -0.0. | |
3210 if ((-1.0 < value) && (value < 0.0)) { | |
3211 int_result = -0.0; | |
3212 | |
3213 // If the error is non-zero, round up. | |
3214 } else if (error > 0.0) { | |
3215 int_result++; | |
3216 } | |
3217 break; | |
3218 } | |
3219 default: | |
3220 UNIMPLEMENTED(); | |
3221 } | |
3222 return int_result; | |
3223 } | |
3224 | |
3225 int32_t Simulator::FPToInt32(double value, FPRounding rmode) { | |
3226 value = FPRoundInt(value, rmode); | |
3227 if (value >= kWMaxInt) { | |
3228 return kWMaxInt; | |
3229 } else if (value < kWMinInt) { | |
3230 return kWMinInt; | |
3231 } | |
3232 return std::isnan(value) ? 0 : static_cast<int32_t>(value); | |
3233 } | |
3234 | |
3235 int64_t Simulator::FPToInt64(double value, FPRounding rmode) { | |
3236 value = FPRoundInt(value, rmode); | |
3237 if (value >= kXMaxInt) { | |
3238 return kXMaxInt; | |
3239 } else if (value < kXMinInt) { | |
3240 return kXMinInt; | |
3241 } | |
3242 return std::isnan(value) ? 0 : static_cast<int64_t>(value); | |
3243 } | |
3244 | |
3245 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) { | |
3246 value = FPRoundInt(value, rmode); | |
3247 if (value >= kWMaxUInt) { | |
3248 return kWMaxUInt; | |
3249 } else if (value < 0.0) { | |
3250 return 0; | |
3251 } | |
3252 return std::isnan(value) ? 0 : static_cast<uint32_t>(value); | |
3253 } | |
3254 | |
3255 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { | |
3256 value = FPRoundInt(value, rmode); | |
3257 if (value >= kXMaxUInt) { | |
3258 return kXMaxUInt; | |
3259 } else if (value < 0.0) { | |
3260 return 0; | |
3261 } | |
3262 return std::isnan(value) ? 0 : static_cast<uint64_t>(value); | |
3263 } | |
3264 | |
3265 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ | |
3266 template <typename T> \ | |
3267 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \ | |
3268 const LogicVRegister& src1, \ | |
3269 const LogicVRegister& src2) { \ | |
3270 dst.ClearForWrite(vform); \ | |
3271 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \ | |
3272 T op1 = src1.Float<T>(i); \ | |
3273 T op2 = src2.Float<T>(i); \ | |
3274 T result; \ | |
3275 if (PROCNAN) { \ | |
3276 result = FPProcessNaNs(op1, op2); \ | |
3277 if (!std::isnan(result)) { \ | |
3278 result = OP(op1, op2); \ | |
3279 } \ | |
3280 } else { \ | |
3281 result = OP(op1, op2); \ | |
3282 } \ | |
3283 dst.SetFloat(i, result); \ | |
3284 } \ | |
3285 return dst; \ | |
3286 } \ | |
3287 \ | |
3288 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \ | |
3289 const LogicVRegister& src1, \ | |
3290 const LogicVRegister& src2) { \ | |
3291 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { \ | |
3292 FN<float>(vform, dst, src1, src2); \ | |
3293 } else { \ | |
3294 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); \ | |
3295 FN<double>(vform, dst, src1, src2); \ | |
3296 } \ | |
3297 return dst; \ | |
3298 } | |
3299 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP) | |
3300 #undef DEFINE_NEON_FP_VECTOR_OP | |
3301 | |
3302 LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst, | |
3303 const LogicVRegister& src1, | |
3304 const LogicVRegister& src2) { | |
3305 SimVRegister temp; | |
3306 LogicVRegister product = fmul(vform, temp, src1, src2); | |
3307 return fneg(vform, dst, product); | |
3308 } | |
3309 | |
3310 template <typename T> | |
3311 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst, | |
3312 const LogicVRegister& src1, | |
3313 const LogicVRegister& src2) { | |
3314 dst.ClearForWrite(vform); | |
3315 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3316 T op1 = -src1.Float<T>(i); | |
3317 T op2 = src2.Float<T>(i); | |
3318 T result = FPProcessNaNs(op1, op2); | |
3319 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2)); | |
3320 } | |
3321 return dst; | |
3322 } | |
3323 | |
3324 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst, | |
3325 const LogicVRegister& src1, | |
3326 const LogicVRegister& src2) { | |
3327 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3328 frecps<float>(vform, dst, src1, src2); | |
3329 } else { | |
3330 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3331 frecps<double>(vform, dst, src1, src2); | |
3332 } | |
3333 return dst; | |
3334 } | |
3335 | |
3336 template <typename T> | |
3337 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst, | |
3338 const LogicVRegister& src1, | |
3339 const LogicVRegister& src2) { | |
3340 dst.ClearForWrite(vform); | |
3341 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3342 T op1 = -src1.Float<T>(i); | |
3343 T op2 = src2.Float<T>(i); | |
3344 T result = FPProcessNaNs(op1, op2); | |
3345 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2)); | |
3346 } | |
3347 return dst; | |
3348 } | |
3349 | |
3350 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst, | |
3351 const LogicVRegister& src1, | |
3352 const LogicVRegister& src2) { | |
3353 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3354 frsqrts<float>(vform, dst, src1, src2); | |
3355 } else { | |
3356 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3357 frsqrts<double>(vform, dst, src1, src2); | |
3358 } | |
3359 return dst; | |
3360 } | |
3361 | |
3362 template <typename T> | |
3363 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst, | |
3364 const LogicVRegister& src1, | |
3365 const LogicVRegister& src2, Condition cond) { | |
3366 dst.ClearForWrite(vform); | |
3367 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3368 bool result = false; | |
3369 T op1 = src1.Float<T>(i); | |
3370 T op2 = src2.Float<T>(i); | |
3371 T nan_result = FPProcessNaNs(op1, op2); | |
3372 if (!std::isnan(nan_result)) { | |
3373 switch (cond) { | |
3374 case eq: | |
3375 result = (op1 == op2); | |
3376 break; | |
3377 case ge: | |
3378 result = (op1 >= op2); | |
3379 break; | |
3380 case gt: | |
3381 result = (op1 > op2); | |
3382 break; | |
3383 case le: | |
3384 result = (op1 <= op2); | |
3385 break; | |
3386 case lt: | |
3387 result = (op1 < op2); | |
3388 break; | |
3389 default: | |
3390 UNREACHABLE(); | |
3391 break; | |
3392 } | |
3393 } | |
3394 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); | |
3395 } | |
3396 return dst; | |
3397 } | |
3398 | |
3399 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst, | |
3400 const LogicVRegister& src1, | |
3401 const LogicVRegister& src2, Condition cond) { | |
3402 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3403 fcmp<float>(vform, dst, src1, src2, cond); | |
3404 } else { | |
3405 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3406 fcmp<double>(vform, dst, src1, src2, cond); | |
3407 } | |
3408 return dst; | |
3409 } | |
3410 | |
3411 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst, | |
3412 const LogicVRegister& src, Condition cond) { | |
3413 SimVRegister temp; | |
3414 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3415 LogicVRegister zero_reg = | |
3416 dup_immediate(vform, temp, bit_cast<uint32_t>(0.0f)); | |
3417 fcmp<float>(vform, dst, src, zero_reg, cond); | |
3418 } else { | |
3419 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3420 LogicVRegister zero_reg = | |
3421 dup_immediate(vform, temp, bit_cast<uint64_t>(0.0)); | |
3422 fcmp<double>(vform, dst, src, zero_reg, cond); | |
3423 } | |
3424 return dst; | |
3425 } | |
3426 | |
3427 LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst, | |
3428 const LogicVRegister& src1, | |
3429 const LogicVRegister& src2, Condition cond) { | |
3430 SimVRegister temp1, temp2; | |
3431 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3432 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1); | |
3433 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2); | |
3434 fcmp<float>(vform, dst, abs_src1, abs_src2, cond); | |
3435 } else { | |
3436 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3437 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1); | |
3438 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2); | |
3439 fcmp<double>(vform, dst, abs_src1, abs_src2, cond); | |
3440 } | |
3441 return dst; | |
3442 } | |
3443 | |
3444 template <typename T> | |
3445 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, | |
3446 const LogicVRegister& src1, | |
3447 const LogicVRegister& src2) { | |
3448 dst.ClearForWrite(vform); | |
3449 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3450 T op1 = src1.Float<T>(i); | |
3451 T op2 = src2.Float<T>(i); | |
3452 T acc = dst.Float<T>(i); | |
3453 T result = FPMulAdd(acc, op1, op2); | |
3454 dst.SetFloat(i, result); | |
3455 } | |
3456 return dst; | |
3457 } | |
3458 | |
3459 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, | |
3460 const LogicVRegister& src1, | |
3461 const LogicVRegister& src2) { | |
3462 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3463 fmla<float>(vform, dst, src1, src2); | |
3464 } else { | |
3465 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3466 fmla<double>(vform, dst, src1, src2); | |
3467 } | |
3468 return dst; | |
3469 } | |
3470 | |
3471 template <typename T> | |
3472 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, | |
3473 const LogicVRegister& src1, | |
3474 const LogicVRegister& src2) { | |
3475 dst.ClearForWrite(vform); | |
3476 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3477 T op1 = -src1.Float<T>(i); | |
3478 T op2 = src2.Float<T>(i); | |
3479 T acc = dst.Float<T>(i); | |
3480 T result = FPMulAdd(acc, op1, op2); | |
3481 dst.SetFloat(i, result); | |
3482 } | |
3483 return dst; | |
3484 } | |
3485 | |
3486 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, | |
3487 const LogicVRegister& src1, | |
3488 const LogicVRegister& src2) { | |
3489 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3490 fmls<float>(vform, dst, src1, src2); | |
3491 } else { | |
3492 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3493 fmls<double>(vform, dst, src1, src2); | |
3494 } | |
3495 return dst; | |
3496 } | |
3497 | |
3498 template <typename T> | |
3499 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst, | |
3500 const LogicVRegister& src) { | |
3501 dst.ClearForWrite(vform); | |
3502 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3503 T op = src.Float<T>(i); | |
3504 op = -op; | |
3505 dst.SetFloat(i, op); | |
3506 } | |
3507 return dst; | |
3508 } | |
3509 | |
3510 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst, | |
3511 const LogicVRegister& src) { | |
3512 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3513 fneg<float>(vform, dst, src); | |
3514 } else { | |
3515 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3516 fneg<double>(vform, dst, src); | |
3517 } | |
3518 return dst; | |
3519 } | |
3520 | |
3521 template <typename T> | |
3522 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst, | |
3523 const LogicVRegister& src) { | |
3524 dst.ClearForWrite(vform); | |
3525 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3526 T op = src.Float<T>(i); | |
3527 if (copysign(1.0, op) < 0.0) { | |
3528 op = -op; | |
3529 } | |
3530 dst.SetFloat(i, op); | |
3531 } | |
3532 return dst; | |
3533 } | |
3534 | |
3535 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst, | |
3536 const LogicVRegister& src) { | |
3537 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3538 fabs_<float>(vform, dst, src); | |
3539 } else { | |
3540 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3541 fabs_<double>(vform, dst, src); | |
3542 } | |
3543 return dst; | |
3544 } | |
3545 | |
3546 LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst, | |
3547 const LogicVRegister& src1, | |
3548 const LogicVRegister& src2) { | |
3549 SimVRegister temp; | |
3550 fsub(vform, temp, src1, src2); | |
3551 fabs_(vform, dst, temp); | |
3552 return dst; | |
3553 } | |
3554 | |
3555 LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst, | |
3556 const LogicVRegister& src) { | |
3557 dst.ClearForWrite(vform); | |
3558 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3559 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3560 float result = FPSqrt(src.Float<float>(i)); | |
3561 dst.SetFloat(i, result); | |
3562 } | |
3563 } else { | |
3564 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3565 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3566 double result = FPSqrt(src.Float<double>(i)); | |
3567 dst.SetFloat(i, result); | |
3568 } | |
3569 } | |
3570 return dst; | |
3571 } | |
3572 | |
3573 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \ | |
3574 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \ | |
3575 const LogicVRegister& src1, \ | |
3576 const LogicVRegister& src2) { \ | |
3577 SimVRegister temp1, temp2; \ | |
3578 uzp1(vform, temp1, src1, src2); \ | |
3579 uzp2(vform, temp2, src1, src2); \ | |
3580 FN(vform, dst, temp1, temp2); \ | |
3581 return dst; \ | |
3582 } \ | |
3583 \ | |
3584 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \ | |
3585 const LogicVRegister& src) { \ | |
3586 if (vform == kFormatS) { \ | |
3587 float result = OP(src.Float<float>(0), src.Float<float>(1)); \ | |
3588 dst.SetFloat(0, result); \ | |
3589 } else { \ | |
3590 DCHECK_EQ(vform, kFormatD); \ | |
3591 double result = OP(src.Float<double>(0), src.Float<double>(1)); \ | |
3592 dst.SetFloat(0, result); \ | |
3593 } \ | |
3594 dst.ClearForWrite(vform); \ | |
3595 return dst; \ | |
3596 } | |
3597 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP) | |
3598 #undef DEFINE_NEON_FP_PAIR_OP | |
3599 | |
3600 LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst, | |
3601 const LogicVRegister& src, FPMinMaxOp Op) { | |
3602 DCHECK_EQ(vform, kFormat4S); | |
3603 USE(vform); | |
3604 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1)); | |
3605 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3)); | |
3606 float result = (this->*Op)(result1, result2); | |
3607 dst.ClearForWrite(kFormatS); | |
3608 dst.SetFloat<float>(0, result); | |
3609 return dst; | |
3610 } | |
3611 | |
3612 LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst, | |
3613 const LogicVRegister& src) { | |
3614 return FMinMaxV(vform, dst, src, &Simulator::FPMax); | |
3615 } | |
3616 | |
3617 LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst, | |
3618 const LogicVRegister& src) { | |
3619 return FMinMaxV(vform, dst, src, &Simulator::FPMin); | |
3620 } | |
3621 | |
3622 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst, | |
3623 const LogicVRegister& src) { | |
3624 return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM); | |
3625 } | |
3626 | |
3627 LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst, | |
3628 const LogicVRegister& src) { | |
3629 return FMinMaxV(vform, dst, src, &Simulator::FPMinNM); | |
3630 } | |
3631 | |
3632 LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst, | |
3633 const LogicVRegister& src1, | |
3634 const LogicVRegister& src2, int index) { | |
3635 dst.ClearForWrite(vform); | |
3636 SimVRegister temp; | |
3637 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3638 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); | |
3639 fmul<float>(vform, dst, src1, index_reg); | |
3640 } else { | |
3641 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3642 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); | |
3643 fmul<double>(vform, dst, src1, index_reg); | |
3644 } | |
3645 return dst; | |
3646 } | |
3647 | |
3648 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, | |
3649 const LogicVRegister& src1, | |
3650 const LogicVRegister& src2, int index) { | |
3651 dst.ClearForWrite(vform); | |
3652 SimVRegister temp; | |
3653 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3654 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); | |
3655 fmla<float>(vform, dst, src1, index_reg); | |
3656 } else { | |
3657 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3658 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); | |
3659 fmla<double>(vform, dst, src1, index_reg); | |
3660 } | |
3661 return dst; | |
3662 } | |
3663 | |
3664 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, | |
3665 const LogicVRegister& src1, | |
3666 const LogicVRegister& src2, int index) { | |
3667 dst.ClearForWrite(vform); | |
3668 SimVRegister temp; | |
3669 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3670 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); | |
3671 fmls<float>(vform, dst, src1, index_reg); | |
3672 } else { | |
3673 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3674 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); | |
3675 fmls<double>(vform, dst, src1, index_reg); | |
3676 } | |
3677 return dst; | |
3678 } | |
3679 | |
3680 LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst, | |
3681 const LogicVRegister& src1, | |
3682 const LogicVRegister& src2, int index) { | |
3683 dst.ClearForWrite(vform); | |
3684 SimVRegister temp; | |
3685 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3686 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); | |
3687 fmulx<float>(vform, dst, src1, index_reg); | |
3688 | |
3689 } else { | |
3690 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3691 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); | |
3692 fmulx<double>(vform, dst, src1, index_reg); | |
3693 } | |
3694 return dst; | |
3695 } | |
3696 | |
3697 LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst, | |
3698 const LogicVRegister& src, | |
3699 FPRounding rounding_mode, | |
3700 bool inexact_exception) { | |
3701 dst.ClearForWrite(vform); | |
3702 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3703 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3704 float input = src.Float<float>(i); | |
3705 float rounded = FPRoundInt(input, rounding_mode); | |
3706 if (inexact_exception && !std::isnan(input) && (input != rounded)) { | |
3707 FPProcessException(); | |
3708 } | |
3709 dst.SetFloat<float>(i, rounded); | |
3710 } | |
3711 } else { | |
3712 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3713 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3714 double input = src.Float<double>(i); | |
3715 double rounded = FPRoundInt(input, rounding_mode); | |
3716 if (inexact_exception && !std::isnan(input) && (input != rounded)) { | |
3717 FPProcessException(); | |
3718 } | |
3719 dst.SetFloat<double>(i, rounded); | |
3720 } | |
3721 } | |
3722 return dst; | |
3723 } | |
3724 | |
3725 LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst, | |
3726 const LogicVRegister& src, | |
3727 FPRounding rounding_mode, int fbits) { | |
3728 dst.ClearForWrite(vform); | |
3729 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3730 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3731 float op = src.Float<float>(i) * std::pow(2.0f, fbits); | |
3732 dst.SetInt(vform, i, FPToInt32(op, rounding_mode)); | |
3733 } | |
3734 } else { | |
3735 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3736 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3737 double op = src.Float<double>(i) * std::pow(2.0, fbits); | |
3738 dst.SetInt(vform, i, FPToInt64(op, rounding_mode)); | |
3739 } | |
3740 } | |
3741 return dst; | |
3742 } | |
3743 | |
3744 LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst, | |
3745 const LogicVRegister& src, | |
3746 FPRounding rounding_mode, int fbits) { | |
3747 dst.ClearForWrite(vform); | |
3748 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3749 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3750 float op = src.Float<float>(i) * std::pow(2.0f, fbits); | |
3751 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode)); | |
3752 } | |
3753 } else { | |
3754 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3755 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3756 double op = src.Float<double>(i) * std::pow(2.0, fbits); | |
3757 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode)); | |
3758 } | |
3759 } | |
3760 return dst; | |
3761 } | |
3762 | |
3763 LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst, | |
3764 const LogicVRegister& src) { | |
3765 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3766 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { | |
3767 dst.SetFloat(i, FPToFloat(src.Float<float16>(i))); | |
3768 } | |
3769 } else { | |
3770 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3771 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { | |
3772 dst.SetFloat(i, FPToDouble(src.Float<float>(i))); | |
3773 } | |
3774 } | |
3775 return dst; | |
3776 } | |
3777 | |
3778 LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst, | |
3779 const LogicVRegister& src) { | |
3780 int lane_count = LaneCountFromFormat(vform); | |
3781 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3782 for (int i = 0; i < lane_count; i++) { | |
3783 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count))); | |
3784 } | |
3785 } else { | |
3786 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3787 for (int i = 0; i < lane_count; i++) { | |
3788 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count))); | |
3789 } | |
3790 } | |
3791 return dst; | |
3792 } | |
3793 | |
3794 LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst, | |
3795 const LogicVRegister& src) { | |
3796 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) { | |
3797 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3798 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven)); | |
3799 } | |
3800 } else { | |
3801 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); | |
3802 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3803 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven)); | |
3804 } | |
3805 } | |
3806 return dst; | |
3807 } | |
3808 | |
3809 LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst, | |
3810 const LogicVRegister& src) { | |
3811 int lane_count = LaneCountFromFormat(vform) / 2; | |
3812 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) { | |
3813 for (int i = lane_count - 1; i >= 0; i--) { | |
3814 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven)); | |
3815 } | |
3816 } else { | |
3817 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); | |
3818 for (int i = lane_count - 1; i >= 0; i--) { | |
3819 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven)); | |
3820 } | |
3821 } | |
3822 return dst; | |
3823 } | |
3824 | |
3825 LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst, | |
3826 const LogicVRegister& src) { | |
3827 dst.ClearForWrite(vform); | |
3828 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); | |
3829 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3830 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd)); | |
3831 } | |
3832 return dst; | |
3833 } | |
3834 | |
3835 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst, | |
3836 const LogicVRegister& src) { | |
3837 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); | |
3838 int lane_count = LaneCountFromFormat(vform) / 2; | |
3839 for (int i = lane_count - 1; i >= 0; i--) { | |
3840 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd)); | |
3841 } | |
3842 return dst; | |
3843 } | |
3844 | |
3845 // Based on reference C function recip_sqrt_estimate from ARM ARM. | |
3846 double Simulator::recip_sqrt_estimate(double a) { | |
3847 int q0, q1, s; | |
3848 double r; | |
3849 if (a < 0.5) { | |
3850 q0 = static_cast<int>(a * 512.0); | |
3851 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0); | |
3852 } else { | |
3853 q1 = static_cast<int>(a * 256.0); | |
3854 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0); | |
3855 } | |
3856 s = static_cast<int>(256.0 * r + 0.5); | |
3857 return static_cast<double>(s) / 256.0; | |
3858 } | |
3859 | |
3860 namespace { | |
3861 | |
3862 inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) { | |
3863 return unsigned_bitextract_64(start_bit, end_bit, val); | |
3864 } | |
3865 | |
3866 } // anonymous namespace | |
3867 | |
3868 template <typename T> | |
3869 T Simulator::FPRecipSqrtEstimate(T op) { | |
3870 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value, | |
3871 "T must be a float or double"); | |
3872 | |
3873 if (std::isnan(op)) { | |
3874 return FPProcessNaN(op); | |
3875 } else if (op == 0.0) { | |
3876 if (copysign(1.0, op) < 0.0) { | |
3877 return kFP64NegativeInfinity; | |
3878 } else { | |
3879 return kFP64PositiveInfinity; | |
3880 } | |
3881 } else if (copysign(1.0, op) < 0.0) { | |
3882 FPProcessException(); | |
3883 return FPDefaultNaN<T>(); | |
3884 } else if (std::isinf(op)) { | |
3885 return 0.0; | |
3886 } else { | |
3887 uint64_t fraction; | |
3888 int32_t exp, result_exp; | |
3889 | |
3890 if (sizeof(T) == sizeof(float)) { | |
3891 exp = static_cast<int32_t>(float_exp(op)); | |
3892 fraction = float_mantissa(op); | |
3893 fraction <<= 29; | |
3894 } else { | |
3895 exp = static_cast<int32_t>(double_exp(op)); | |
3896 fraction = double_mantissa(op); | |
3897 } | |
3898 | |
3899 if (exp == 0) { | |
3900 while (Bits(fraction, 51, 51) == 0) { | |
3901 fraction = Bits(fraction, 50, 0) << 1; | |
3902 exp -= 1; | |
3903 } | |
3904 fraction = Bits(fraction, 50, 0) << 1; | |
3905 } | |
3906 | |
3907 double scaled; | |
3908 if (Bits(exp, 0, 0) == 0) { | |
3909 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44); | |
3910 } else { | |
3911 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44); | |
3912 } | |
3913 | |
3914 if (sizeof(T) == sizeof(float)) { | |
3915 result_exp = (380 - exp) / 2; | |
3916 } else { | |
3917 result_exp = (3068 - exp) / 2; | |
3918 } | |
3919 | |
3920 uint64_t estimate = bit_cast<uint64_t>(recip_sqrt_estimate(scaled)); | |
3921 | |
3922 if (sizeof(T) == sizeof(float)) { | |
3923 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); | |
3924 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29)); | |
3925 return float_pack(0, exp_bits, est_bits); | |
3926 } else { | |
3927 return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0)); | |
3928 } | |
3929 } | |
3930 } | |
3931 | |
3932 LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst, | |
3933 const LogicVRegister& src) { | |
3934 dst.ClearForWrite(vform); | |
3935 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3936 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3937 float input = src.Float<float>(i); | |
3938 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input)); | |
3939 } | |
3940 } else { | |
3941 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3942 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3943 double input = src.Float<double>(i); | |
3944 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input)); | |
3945 } | |
3946 } | |
3947 return dst; | |
3948 } | |
3949 | |
3950 template <typename T> | |
3951 T Simulator::FPRecipEstimate(T op, FPRounding rounding) { | |
3952 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value, | |
3953 "T must be a float or double"); | |
3954 uint32_t sign; | |
3955 | |
3956 if (sizeof(T) == sizeof(float)) { | |
3957 sign = float_sign(op); | |
3958 } else { | |
3959 sign = double_sign(op); | |
3960 } | |
3961 | |
3962 if (std::isnan(op)) { | |
3963 return FPProcessNaN(op); | |
3964 } else if (std::isinf(op)) { | |
3965 return (sign == 1) ? -0.0 : 0.0; | |
3966 } else if (op == 0.0) { | |
3967 FPProcessException(); // FPExc_DivideByZero exception. | |
3968 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; | |
3969 } else if (((sizeof(T) == sizeof(float)) && | |
3970 (std::fabs(op) < std::pow(2.0, -128.0))) || | |
3971 ((sizeof(T) == sizeof(double)) && | |
3972 (std::fabs(op) < std::pow(2.0, -1024.0)))) { | |
3973 bool overflow_to_inf = false; | |
3974 switch (rounding) { | |
3975 case FPTieEven: | |
3976 overflow_to_inf = true; | |
3977 break; | |
3978 case FPPositiveInfinity: | |
3979 overflow_to_inf = (sign == 0); | |
3980 break; | |
3981 case FPNegativeInfinity: | |
3982 overflow_to_inf = (sign == 1); | |
3983 break; | |
3984 case FPZero: | |
3985 overflow_to_inf = false; | |
3986 break; | |
3987 default: | |
3988 break; | |
3989 } | |
3990 FPProcessException(); // FPExc_Overflow and FPExc_Inexact. | |
3991 if (overflow_to_inf) { | |
3992 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; | |
3993 } else { | |
3994 // Return FPMaxNormal(sign). | |
3995 if (sizeof(T) == sizeof(float)) { | |
3996 return float_pack(sign, 0xfe, 0x07fffff); | |
3997 } else { | |
3998 return double_pack(sign, 0x7fe, 0x0fffffffffffffl); | |
3999 } | |
4000 } | |
4001 } else { | |
4002 uint64_t fraction; | |
4003 int32_t exp, result_exp; | |
4004 uint32_t sign; | |
4005 | |
4006 if (sizeof(T) == sizeof(float)) { | |
4007 sign = float_sign(op); | |
4008 exp = static_cast<int32_t>(float_exp(op)); | |
4009 fraction = float_mantissa(op); | |
4010 fraction <<= 29; | |
4011 } else { | |
4012 sign = double_sign(op); | |
4013 exp = static_cast<int32_t>(double_exp(op)); | |
4014 fraction = double_mantissa(op); | |
4015 } | |
4016 | |
4017 if (exp == 0) { | |
4018 if (Bits(fraction, 51, 51) == 0) { | |
4019 exp -= 1; | |
4020 fraction = Bits(fraction, 49, 0) << 2; | |
4021 } else { | |
4022 fraction = Bits(fraction, 50, 0) << 1; | |
4023 } | |
4024 } | |
4025 | |
4026 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44); | |
4027 | |
4028 if (sizeof(T) == sizeof(float)) { | |
4029 result_exp = 253 - exp; | |
4030 } else { | |
4031 result_exp = 2045 - exp; | |
4032 } | |
4033 | |
4034 double estimate = recip_estimate(scaled); | |
4035 | |
4036 fraction = double_mantissa(estimate); | |
4037 if (result_exp == 0) { | |
4038 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1); | |
4039 } else if (result_exp == -1) { | |
4040 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2); | |
4041 result_exp = 0; | |
4042 } | |
4043 if (sizeof(T) == sizeof(float)) { | |
4044 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); | |
4045 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29)); | |
4046 return float_pack(sign, exp_bits, frac_bits); | |
4047 } else { | |
4048 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0)); | |
4049 } | |
4050 } | |
4051 } | |
4052 | |
4053 LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst, | |
4054 const LogicVRegister& src, FPRounding round) { | |
4055 dst.ClearForWrite(vform); | |
4056 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
4057 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
4058 float input = src.Float<float>(i); | |
4059 dst.SetFloat(i, FPRecipEstimate<float>(input, round)); | |
4060 } | |
4061 } else { | |
4062 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
4063 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
4064 double input = src.Float<double>(i); | |
4065 dst.SetFloat(i, FPRecipEstimate<double>(input, round)); | |
4066 } | |
4067 } | |
4068 return dst; | |
4069 } | |
4070 | |
4071 LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst, | |
4072 const LogicVRegister& src) { | |
4073 dst.ClearForWrite(vform); | |
4074 uint64_t operand; | |
4075 uint32_t result; | |
4076 double dp_operand, dp_result; | |
4077 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
4078 operand = src.Uint(vform, i); | |
4079 if (operand <= 0x3FFFFFFF) { | |
4080 result = 0xFFFFFFFF; | |
4081 } else { | |
4082 dp_operand = operand * std::pow(2.0, -32); | |
4083 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31); | |
4084 result = static_cast<uint32_t>(dp_result); | |
4085 } | |
4086 dst.SetUint(vform, i, result); | |
4087 } | |
4088 return dst; | |
4089 } | |
4090 | |
4091 // Based on reference C function recip_estimate from ARM ARM. | |
4092 double Simulator::recip_estimate(double a) { | |
4093 int q, s; | |
4094 double r; | |
4095 q = static_cast<int>(a * 512.0); | |
4096 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0); | |
4097 s = static_cast<int>(256.0 * r + 0.5); | |
4098 return static_cast<double>(s) / 256.0; | |
4099 } | |
4100 | |
4101 LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst, | |
4102 const LogicVRegister& src) { | |
4103 dst.ClearForWrite(vform); | |
4104 uint64_t operand; | |
4105 uint32_t result; | |
4106 double dp_operand, dp_result; | |
4107 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
4108 operand = src.Uint(vform, i); | |
4109 if (operand <= 0x7FFFFFFF) { | |
4110 result = 0xFFFFFFFF; | |
4111 } else { | |
4112 dp_operand = operand * std::pow(2.0, -32); | |
4113 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31); | |
4114 result = static_cast<uint32_t>(dp_result); | |
4115 } | |
4116 dst.SetUint(vform, i, result); | |
4117 } | |
4118 return dst; | |
4119 } | |
4120 | |
4121 template <typename T> | |
4122 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst, | |
4123 const LogicVRegister& src) { | |
4124 dst.ClearForWrite(vform); | |
4125 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
4126 T op = src.Float<T>(i); | |
4127 T result; | |
4128 if (std::isnan(op)) { | |
4129 result = FPProcessNaN(op); | |
4130 } else { | |
4131 int exp; | |
4132 uint32_t sign; | |
4133 if (sizeof(T) == sizeof(float)) { | |
4134 sign = float_sign(op); | |
4135 exp = static_cast<int>(float_exp(op)); | |
4136 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0)); | |
4137 result = float_pack(sign, exp, 0); | |
4138 } else { | |
4139 sign = double_sign(op); | |
4140 exp = static_cast<int>(double_exp(op)); | |
4141 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0)); | |
4142 result = double_pack(sign, exp, 0); | |
4143 } | |
4144 } | |
4145 dst.SetFloat(i, result); | |
4146 } | |
4147 return dst; | |
4148 } | |
4149 | |
4150 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst, | |
4151 const LogicVRegister& src) { | |
4152 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
4153 frecpx<float>(vform, dst, src); | |
4154 } else { | |
4155 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
4156 frecpx<double>(vform, dst, src); | |
4157 } | |
4158 return dst; | |
4159 } | |
4160 | |
4161 LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst, | |
4162 const LogicVRegister& src, int fbits, | |
4163 FPRounding round) { | |
4164 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
4165 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
4166 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round); | |
4167 dst.SetFloat<float>(i, result); | |
4168 } else { | |
4169 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
4170 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round); | |
4171 dst.SetFloat<double>(i, result); | |
4172 } | |
4173 } | |
4174 return dst; | |
4175 } | |
4176 | |
4177 LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst, | |
4178 const LogicVRegister& src, int fbits, | |
4179 FPRounding round) { | |
4180 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
4181 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
4182 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round); | |
4183 dst.SetFloat<float>(i, result); | |
4184 } else { | |
4185 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
4186 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round); | |
4187 dst.SetFloat<double>(i, result); | |
4188 } | |
4189 } | |
4190 return dst; | |
4191 } | |
4192 | |
4193 #endif // USE_SIMULATOR | |
4194 | |
4195 } // namespace internal | |
4196 } // namespace v8 | |
4197 | |
4198 #endif // V8_TARGET_ARCH_ARM64 | |
OLD | NEW |