OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 the V8 project authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #if V8_TARGET_ARCH_ARM64 | |
6 | |
7 #include <cmath> | |
8 #include "src/arm64/simulator-arm64.h" | |
bbudge
2017/01/31 01:41:32
Did you pull this out of simulator-arm64.cc becaus
martyn.capewell
2017/02/03 11:01:31
This file contains the more complicated simulator
bbudge
2017/02/08 01:39:11
That's fine.
martyn.capewell
2017/02/15 11:51:00
Done.
| |
9 | |
10 namespace v8 { | |
11 namespace internal { | |
12 | |
13 #if defined(USE_SIMULATOR) | |
14 | |
15 template <> | |
16 double Simulator::FPDefaultNaN<double>() { | |
17 return kFP64DefaultNaN; | |
18 } | |
bbudge
2017/01/31 01:41:32
Why not define these inline in the header, simulat
martyn.capewell
2017/02/03 11:01:31
Done.
| |
19 | |
20 template <> | |
21 float Simulator::FPDefaultNaN<float>() { | |
22 return kFP32DefaultNaN; | |
23 } | |
24 | |
25 namespace { | |
26 | |
27 // See FPRound for a description of this function. | |
28 inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa, | |
29 FPRounding round_mode) { | |
30 uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>( | |
31 sign, exponent, mantissa, round_mode); | |
32 return bit_cast<double>(bits); | |
33 } | |
34 | |
35 // See FPRound for a description of this function. | |
36 inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa, | |
37 FPRounding round_mode) { | |
38 uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>( | |
39 sign, exponent, mantissa, round_mode); | |
40 return bit_cast<float>(bits); | |
41 } | |
42 | |
43 // See FPRound for a description of this function. | |
44 inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent, | |
45 uint64_t mantissa, FPRounding round_mode) { | |
46 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>( | |
47 sign, exponent, mantissa, round_mode); | |
48 } | |
49 | |
50 } // anonymous namespace | |
bbudge
2017/01/31 01:41:32
nit: just 'namespace'
martyn.capewell
2017/02/03 11:01:31
Done.
| |
51 | |
52 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) { | |
53 if (src >= 0) { | |
54 return UFixedToDouble(src, fbits, round); | |
55 } else if (src == INT64_MIN) { | |
56 return -UFixedToDouble(src, fbits, round); | |
57 } else { | |
58 return -UFixedToDouble(-src, fbits, round); | |
59 } | |
60 } | |
61 | |
62 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) { | |
63 // An input of 0 is a special case because the result is effectively | |
64 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. | |
65 if (src == 0) { | |
66 return 0.0; | |
67 } | |
68 | |
69 // Calculate the exponent. The highest significant bit will have the value | |
70 // 2^exponent. | |
71 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64); | |
72 const int64_t exponent = highest_significant_bit - fbits; | |
73 | |
74 return FPRoundToDouble(0, exponent, src, round); | |
75 } | |
76 | |
77 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) { | |
78 if (src >= 0) { | |
79 return UFixedToFloat(src, fbits, round); | |
80 } else if (src == INT64_MIN) { | |
81 return -UFixedToFloat(src, fbits, round); | |
82 } else { | |
83 return -UFixedToFloat(-src, fbits, round); | |
84 } | |
85 } | |
86 | |
87 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) { | |
88 // An input of 0 is a special case because the result is effectively | |
89 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. | |
90 if (src == 0) { | |
91 return 0.0f; | |
92 } | |
93 | |
94 // Calculate the exponent. The highest significant bit will have the value | |
95 // 2^exponent. | |
96 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64); | |
97 const int32_t exponent = highest_significant_bit - fbits; | |
98 | |
99 return FPRoundToFloat(0, exponent, src, round); | |
100 } | |
101 | |
102 double Simulator::FPToDouble(float value) { | |
103 switch (std::fpclassify(value)) { | |
104 case FP_NAN: { | |
105 if (IsSignallingNaN(value)) { | |
106 FPProcessException(); | |
107 } | |
108 if (DN()) return kFP64DefaultNaN; | |
109 | |
110 // Convert NaNs as the processor would: | |
111 // - The sign is propagated. | |
112 // - The payload (mantissa) is transferred entirely, except that the top | |
bbudge
2017/01/31 01:41:32
s/payload/mantissa ?
martyn.capewell
2017/02/03 11:01:31
Not sure what you mean here. In the context of a N
bbudge
2017/02/08 01:39:11
For consistency. It looks like 'mantissa' is used
martyn.capewell
2017/02/15 11:51:00
Done.
| |
113 // bit is forced to '1', making the result a quiet NaN. The unused | |
114 // (low-order) payload bits are set to 0. | |
115 uint32_t raw = bit_cast<uint32_t>(value); | |
116 | |
117 uint64_t sign = raw >> 31; | |
118 uint64_t exponent = (1 << kDoubleExponentBits) - 1; | |
119 uint64_t payload = unsigned_bitextract_64(21, 0, raw); | |
120 | |
121 // Unused low-order bits remain zero. | |
122 payload <<= (kDoubleMantissaBits - kFloatMantissaBits); | |
123 | |
124 // Force a quiet NaN. | |
125 payload |= (UINT64_C(1) << (kDoubleMantissaBits - 1)); | |
126 | |
127 return double_pack(sign, exponent, payload); | |
128 } | |
129 | |
130 case FP_ZERO: | |
131 case FP_NORMAL: | |
132 case FP_SUBNORMAL: | |
133 case FP_INFINITE: { | |
134 // All other inputs are preserved in a standard cast, because every value | |
135 // representable using an IEEE-754 float is also representable using an | |
136 // IEEE-754 double. | |
137 return static_cast<double>(value); | |
138 } | |
139 } | |
140 | |
141 UNREACHABLE(); | |
142 return kFP64DefaultNaN; | |
143 } | |
144 | |
145 float Simulator::FPToFloat(float16 value) { | |
146 uint32_t sign = value >> 15; | |
147 uint32_t exponent = | |
148 unsigned_bitextract_32(kFloat16MantissaBits + kFloat16ExponentBits - 1, | |
149 kFloat16MantissaBits, value); | |
150 uint32_t mantissa = | |
151 unsigned_bitextract_32(kFloat16MantissaBits - 1, 0, value); | |
152 | |
153 switch (float16classify(value)) { | |
154 case FP_ZERO: | |
155 return (sign == 0) ? 0.0f : -0.0f; | |
156 | |
157 case FP_INFINITE: | |
158 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity; | |
159 | |
160 case FP_SUBNORMAL: { | |
161 // Calculate shift required to put mantissa into the most-significant bits | |
162 // of the destination mantissa. | |
163 int shift = CountLeadingZeros(mantissa << (32 - 10), 32); | |
164 | |
165 // Shift mantissa and discard implicit '1'. | |
166 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1; | |
167 mantissa &= (1 << kFloatMantissaBits) - 1; | |
168 | |
169 // Adjust the exponent for the shift applied, and rebias. | |
170 exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias); | |
171 break; | |
172 } | |
173 | |
174 case FP_NAN: { | |
175 if (IsSignallingNaN(value)) { | |
176 FPProcessException(); | |
177 } | |
178 if (DN()) return kFP32DefaultNaN; | |
179 | |
180 // Convert NaNs as the processor would: | |
181 // - The sign is propagated. | |
182 // - The payload (mantissa) is transferred entirely, except that the top | |
183 // bit is forced to '1', making the result a quiet NaN. The unused | |
184 // (low-order) payload bits are set to 0. | |
185 exponent = (1 << kFloatExponentBits) - 1; | |
186 | |
187 // Increase bits in mantissa, making low-order bits 0. | |
188 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); | |
189 mantissa |= 1 << (kFloatMantissaBits - 1); // Force a quiet NaN. | |
190 break; | |
191 } | |
192 | |
193 case FP_NORMAL: { | |
194 // Increase bits in mantissa, making low-order bits 0. | |
195 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); | |
196 | |
197 // Change exponent bias. | |
198 exponent += (kFloatExponentBias - kFloat16ExponentBias); | |
199 break; | |
200 } | |
201 | |
202 default: | |
203 UNREACHABLE(); | |
204 return kFP32DefaultNaN; | |
205 } | |
206 return float_pack(sign, exponent, mantissa); | |
207 } | |
208 | |
209 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) { | |
210 // Only the FPTieEven rounding mode is implemented. | |
211 DCHECK_EQ(round_mode, FPTieEven); | |
212 USE(round_mode); | |
213 | |
214 int64_t sign = float_sign(value); | |
215 int64_t exponent = | |
216 static_cast<int64_t>(float_exp(value)) - kFloatExponentBias; | |
217 uint32_t mantissa = float_mantissa(value); | |
218 | |
219 switch (std::fpclassify(value)) { | |
220 case FP_NAN: { | |
221 if (IsSignallingNaN(value)) { | |
222 FPProcessException(); | |
223 } | |
224 if (DN()) return kFP16DefaultNaN; | |
225 | |
226 // Convert NaNs as the processor would: | |
227 // - The sign is propagated. | |
228 // - The payload (mantissa) is transferred as much as possible, except | |
229 // that the top bit is forced to '1', making the result a quiet NaN. | |
230 float16 result = | |
231 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; | |
232 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits); | |
233 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN; | |
234 return result; | |
235 } | |
236 | |
237 case FP_ZERO: | |
238 return (sign == 0) ? 0 : 0x8000; | |
239 | |
240 case FP_INFINITE: | |
241 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; | |
242 | |
243 case FP_NORMAL: | |
244 case FP_SUBNORMAL: { | |
245 // Convert float-to-half as the processor would, assuming that FPCR.FZ | |
246 // (flush-to-zero) is not set. | |
247 | |
248 // Add the implicit '1' bit to the mantissa. | |
249 mantissa += (1 << kFloatMantissaBits); | |
250 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); | |
251 } | |
252 } | |
253 | |
254 UNREACHABLE(); | |
255 return kFP16DefaultNaN; | |
256 } | |
257 | |
258 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) { | |
259 // Only the FPTieEven rounding mode is implemented. | |
260 DCHECK_EQ(round_mode, FPTieEven); | |
261 USE(round_mode); | |
262 | |
263 int64_t sign = double_sign(value); | |
264 int64_t exponent = | |
265 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias; | |
266 uint64_t mantissa = double_mantissa(value); | |
267 | |
268 switch (std::fpclassify(value)) { | |
269 case FP_NAN: { | |
270 if (IsSignallingNaN(value)) { | |
271 FPProcessException(); | |
272 } | |
273 if (DN()) return kFP16DefaultNaN; | |
274 | |
275 // Convert NaNs as the processor would: | |
276 // - The sign is propagated. | |
277 // - The payload (mantissa) is transferred as much as possible, except | |
278 // that the top bit is forced to '1', making the result a quiet NaN. | |
279 float16 result = | |
280 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; | |
281 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits); | |
282 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN; | |
283 return result; | |
284 } | |
285 | |
286 case FP_ZERO: | |
287 return (sign == 0) ? 0 : 0x8000; | |
288 | |
289 case FP_INFINITE: | |
290 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; | |
291 | |
292 case FP_NORMAL: | |
293 case FP_SUBNORMAL: { | |
294 // Convert double-to-half as the processor would, assuming that FPCR.FZ | |
295 // (flush-to-zero) is not set. | |
296 | |
297 // Add the implicit '1' bit to the mantissa. | |
298 mantissa += (UINT64_C(1) << kDoubleMantissaBits); | |
299 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); | |
300 } | |
301 } | |
302 | |
303 UNREACHABLE(); | |
304 return kFP16DefaultNaN; | |
305 } | |
306 | |
307 float Simulator::FPToFloat(double value, FPRounding round_mode) { | |
308 // Only the FPTieEven rounding mode is implemented. | |
309 DCHECK((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); | |
310 USE(round_mode); | |
311 | |
312 switch (std::fpclassify(value)) { | |
313 case FP_NAN: { | |
314 if (IsSignallingNaN(value)) { | |
315 FPProcessException(); | |
316 } | |
317 if (DN()) return kFP32DefaultNaN; | |
318 | |
319 // Convert NaNs as the processor would: | |
320 // - The sign is propagated. | |
321 // - The payload (mantissa) is transferred as much as possible, except | |
322 // that the top bit is forced to '1', making the result a quiet NaN. | |
bbudge
2017/01/31 01:41:32
s/payload/mantissa
martyn.capewell
2017/02/15 11:51:02
Done.
| |
323 | |
324 uint64_t raw = bit_cast<uint64_t>(value); | |
325 | |
326 uint32_t sign = raw >> 63; | |
327 uint32_t exponent = (1 << 8) - 1; | |
328 uint32_t payload = static_cast<uint32_t>(unsigned_bitextract_64( | |
329 50, kDoubleMantissaBits - kFloatMantissaBits, raw)); | |
330 payload |= (1 << (kFloatMantissaBits - 1)); // Force a quiet NaN. | |
331 | |
332 return float_pack(sign, exponent, payload); | |
333 } | |
334 | |
335 case FP_ZERO: | |
336 case FP_INFINITE: { | |
337 // In a C++ cast, any value representable in the target type will be | |
338 // unchanged. This is always the case for +/-0.0 and infinities. | |
339 return static_cast<float>(value); | |
340 } | |
341 | |
342 case FP_NORMAL: | |
343 case FP_SUBNORMAL: { | |
344 // Convert double-to-float as the processor would, assuming that FPCR.FZ | |
345 // (flush-to-zero) is not set. | |
346 uint32_t sign = double_sign(value); | |
347 int64_t exponent = | |
348 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias; | |
349 uint64_t mantissa = double_mantissa(value); | |
350 if (std::fpclassify(value) == FP_NORMAL) { | |
351 // For normal FP values, add the hidden bit. | |
352 mantissa |= (UINT64_C(1) << kDoubleMantissaBits); | |
353 } | |
354 return FPRoundToFloat(sign, exponent, mantissa, round_mode); | |
355 } | |
356 } | |
357 | |
358 UNREACHABLE(); | |
359 return kFP32DefaultNaN; | |
360 } | |
361 | |
362 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) { | |
363 dst.ClearForWrite(vform); | |
364 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
365 dst.ReadUintFromMem(vform, i, addr); | |
366 addr += LaneSizeInBytesFromFormat(vform); | |
367 } | |
368 } | |
369 | |
370 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index, | |
371 uint64_t addr) { | |
372 dst.ReadUintFromMem(vform, index, addr); | |
373 } | |
374 | |
375 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) { | |
376 dst.ClearForWrite(vform); | |
377 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
378 dst.ReadUintFromMem(vform, i, addr); | |
379 } | |
380 } | |
381 | |
382 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1, | |
383 LogicVRegister dst2, uint64_t addr1) { | |
384 dst1.ClearForWrite(vform); | |
385 dst2.ClearForWrite(vform); | |
386 int esize = LaneSizeInBytesFromFormat(vform); | |
387 uint64_t addr2 = addr1 + esize; | |
388 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
389 dst1.ReadUintFromMem(vform, i, addr1); | |
390 dst2.ReadUintFromMem(vform, i, addr2); | |
391 addr1 += 2 * esize; | |
392 addr2 += 2 * esize; | |
393 } | |
394 } | |
395 | |
396 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1, | |
397 LogicVRegister dst2, int index, uint64_t addr1) { | |
398 dst1.ClearForWrite(vform); | |
399 dst2.ClearForWrite(vform); | |
400 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); | |
401 dst1.ReadUintFromMem(vform, index, addr1); | |
402 dst2.ReadUintFromMem(vform, index, addr2); | |
403 } | |
404 | |
405 void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1, | |
406 LogicVRegister dst2, uint64_t addr) { | |
407 dst1.ClearForWrite(vform); | |
408 dst2.ClearForWrite(vform); | |
409 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); | |
410 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
411 dst1.ReadUintFromMem(vform, i, addr); | |
412 dst2.ReadUintFromMem(vform, i, addr2); | |
413 } | |
414 } | |
415 | |
416 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1, | |
417 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) { | |
418 dst1.ClearForWrite(vform); | |
419 dst2.ClearForWrite(vform); | |
420 dst3.ClearForWrite(vform); | |
421 int esize = LaneSizeInBytesFromFormat(vform); | |
422 uint64_t addr2 = addr1 + esize; | |
423 uint64_t addr3 = addr2 + esize; | |
424 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
425 dst1.ReadUintFromMem(vform, i, addr1); | |
426 dst2.ReadUintFromMem(vform, i, addr2); | |
427 dst3.ReadUintFromMem(vform, i, addr3); | |
428 addr1 += 3 * esize; | |
429 addr2 += 3 * esize; | |
430 addr3 += 3 * esize; | |
431 } | |
432 } | |
433 | |
434 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1, | |
435 LogicVRegister dst2, LogicVRegister dst3, int index, | |
436 uint64_t addr1) { | |
437 dst1.ClearForWrite(vform); | |
438 dst2.ClearForWrite(vform); | |
439 dst3.ClearForWrite(vform); | |
440 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); | |
441 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); | |
442 dst1.ReadUintFromMem(vform, index, addr1); | |
443 dst2.ReadUintFromMem(vform, index, addr2); | |
444 dst3.ReadUintFromMem(vform, index, addr3); | |
445 } | |
446 | |
447 void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1, | |
448 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) { | |
449 dst1.ClearForWrite(vform); | |
450 dst2.ClearForWrite(vform); | |
451 dst3.ClearForWrite(vform); | |
452 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); | |
453 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); | |
454 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
455 dst1.ReadUintFromMem(vform, i, addr); | |
456 dst2.ReadUintFromMem(vform, i, addr2); | |
457 dst3.ReadUintFromMem(vform, i, addr3); | |
458 } | |
459 } | |
460 | |
461 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1, | |
462 LogicVRegister dst2, LogicVRegister dst3, | |
463 LogicVRegister dst4, uint64_t addr1) { | |
464 dst1.ClearForWrite(vform); | |
465 dst2.ClearForWrite(vform); | |
466 dst3.ClearForWrite(vform); | |
467 dst4.ClearForWrite(vform); | |
468 int esize = LaneSizeInBytesFromFormat(vform); | |
469 uint64_t addr2 = addr1 + esize; | |
470 uint64_t addr3 = addr2 + esize; | |
471 uint64_t addr4 = addr3 + esize; | |
472 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
473 dst1.ReadUintFromMem(vform, i, addr1); | |
474 dst2.ReadUintFromMem(vform, i, addr2); | |
475 dst3.ReadUintFromMem(vform, i, addr3); | |
476 dst4.ReadUintFromMem(vform, i, addr4); | |
477 addr1 += 4 * esize; | |
478 addr2 += 4 * esize; | |
479 addr3 += 4 * esize; | |
480 addr4 += 4 * esize; | |
481 } | |
482 } | |
483 | |
484 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1, | |
485 LogicVRegister dst2, LogicVRegister dst3, | |
486 LogicVRegister dst4, int index, uint64_t addr1) { | |
487 dst1.ClearForWrite(vform); | |
488 dst2.ClearForWrite(vform); | |
489 dst3.ClearForWrite(vform); | |
490 dst4.ClearForWrite(vform); | |
491 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); | |
492 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); | |
493 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); | |
494 dst1.ReadUintFromMem(vform, index, addr1); | |
495 dst2.ReadUintFromMem(vform, index, addr2); | |
496 dst3.ReadUintFromMem(vform, index, addr3); | |
497 dst4.ReadUintFromMem(vform, index, addr4); | |
498 } | |
499 | |
500 void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1, | |
501 LogicVRegister dst2, LogicVRegister dst3, | |
502 LogicVRegister dst4, uint64_t addr) { | |
503 dst1.ClearForWrite(vform); | |
504 dst2.ClearForWrite(vform); | |
505 dst3.ClearForWrite(vform); | |
506 dst4.ClearForWrite(vform); | |
507 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); | |
508 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); | |
509 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); | |
510 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
511 dst1.ReadUintFromMem(vform, i, addr); | |
512 dst2.ReadUintFromMem(vform, i, addr2); | |
513 dst3.ReadUintFromMem(vform, i, addr3); | |
514 dst4.ReadUintFromMem(vform, i, addr4); | |
515 } | |
516 } | |
517 | |
518 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) { | |
519 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
520 src.WriteUintToMem(vform, i, addr); | |
521 addr += LaneSizeInBytesFromFormat(vform); | |
522 } | |
523 } | |
524 | |
525 void Simulator::st1(VectorFormat vform, LogicVRegister src, int index, | |
526 uint64_t addr) { | |
527 src.WriteUintToMem(vform, index, addr); | |
528 } | |
529 | |
530 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, | |
531 uint64_t addr) { | |
532 int esize = LaneSizeInBytesFromFormat(vform); | |
533 uint64_t addr2 = addr + esize; | |
534 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
535 dst.WriteUintToMem(vform, i, addr); | |
536 dst2.WriteUintToMem(vform, i, addr2); | |
537 addr += 2 * esize; | |
538 addr2 += 2 * esize; | |
539 } | |
540 } | |
541 | |
542 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, | |
543 int index, uint64_t addr) { | |
544 int esize = LaneSizeInBytesFromFormat(vform); | |
545 dst.WriteUintToMem(vform, index, addr); | |
546 dst2.WriteUintToMem(vform, index, addr + 1 * esize); | |
547 } | |
548 | |
549 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, | |
550 LogicVRegister dst3, uint64_t addr) { | |
551 int esize = LaneSizeInBytesFromFormat(vform); | |
552 uint64_t addr2 = addr + esize; | |
553 uint64_t addr3 = addr2 + esize; | |
554 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
555 dst.WriteUintToMem(vform, i, addr); | |
556 dst2.WriteUintToMem(vform, i, addr2); | |
557 dst3.WriteUintToMem(vform, i, addr3); | |
558 addr += 3 * esize; | |
559 addr2 += 3 * esize; | |
560 addr3 += 3 * esize; | |
561 } | |
562 } | |
563 | |
564 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, | |
565 LogicVRegister dst3, int index, uint64_t addr) { | |
566 int esize = LaneSizeInBytesFromFormat(vform); | |
567 dst.WriteUintToMem(vform, index, addr); | |
568 dst2.WriteUintToMem(vform, index, addr + 1 * esize); | |
569 dst3.WriteUintToMem(vform, index, addr + 2 * esize); | |
570 } | |
571 | |
572 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, | |
573 LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) { | |
574 int esize = LaneSizeInBytesFromFormat(vform); | |
575 uint64_t addr2 = addr + esize; | |
576 uint64_t addr3 = addr2 + esize; | |
577 uint64_t addr4 = addr3 + esize; | |
578 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
579 dst.WriteUintToMem(vform, i, addr); | |
580 dst2.WriteUintToMem(vform, i, addr2); | |
581 dst3.WriteUintToMem(vform, i, addr3); | |
582 dst4.WriteUintToMem(vform, i, addr4); | |
583 addr += 4 * esize; | |
584 addr2 += 4 * esize; | |
585 addr3 += 4 * esize; | |
586 addr4 += 4 * esize; | |
587 } | |
588 } | |
589 | |
590 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, | |
591 LogicVRegister dst3, LogicVRegister dst4, int index, | |
592 uint64_t addr) { | |
593 int esize = LaneSizeInBytesFromFormat(vform); | |
594 dst.WriteUintToMem(vform, index, addr); | |
595 dst2.WriteUintToMem(vform, index, addr + 1 * esize); | |
596 dst3.WriteUintToMem(vform, index, addr + 2 * esize); | |
597 dst4.WriteUintToMem(vform, index, addr + 3 * esize); | |
598 } | |
599 | |
600 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst, | |
601 const LogicVRegister& src1, | |
602 const LogicVRegister& src2, Condition cond) { | |
603 dst.ClearForWrite(vform); | |
604 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
605 int64_t sa = src1.Int(vform, i); | |
606 int64_t sb = src2.Int(vform, i); | |
607 uint64_t ua = src1.Uint(vform, i); | |
608 uint64_t ub = src2.Uint(vform, i); | |
609 bool result = false; | |
610 switch (cond) { | |
611 case eq: | |
612 result = (ua == ub); | |
613 break; | |
614 case ge: | |
615 result = (sa >= sb); | |
616 break; | |
617 case gt: | |
618 result = (sa > sb); | |
619 break; | |
620 case hi: | |
621 result = (ua > ub); | |
622 break; | |
623 case hs: | |
624 result = (ua >= ub); | |
625 break; | |
626 case lt: | |
627 result = (sa < sb); | |
628 break; | |
629 case le: | |
630 result = (sa <= sb); | |
631 break; | |
632 default: | |
633 UNREACHABLE(); | |
634 break; | |
635 } | |
636 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); | |
637 } | |
638 return dst; | |
639 } | |
640 | |
641 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst, | |
642 const LogicVRegister& src1, int imm, | |
643 Condition cond) { | |
644 SimVRegister temp; | |
645 LogicVRegister imm_reg = dup_immediate(vform, temp, imm); | |
646 return cmp(vform, dst, src1, imm_reg, cond); | |
647 } | |
648 | |
649 LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst, | |
650 const LogicVRegister& src1, | |
651 const LogicVRegister& src2) { | |
652 dst.ClearForWrite(vform); | |
653 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
654 uint64_t ua = src1.Uint(vform, i); | |
655 uint64_t ub = src2.Uint(vform, i); | |
656 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0); | |
657 } | |
658 return dst; | |
659 } | |
660 | |
661 LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst, | |
662 const LogicVRegister& src1, | |
663 const LogicVRegister& src2) { | |
664 int lane_size = LaneSizeInBitsFromFormat(vform); | |
665 dst.ClearForWrite(vform); | |
666 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
667 // Test for unsigned saturation. | |
668 uint64_t ua = src1.UintLeftJustified(vform, i); | |
669 uint64_t ub = src2.UintLeftJustified(vform, i); | |
670 uint64_t ur = ua + ub; | |
671 if (ur < ua) { | |
672 dst.SetUnsignedSat(i, true); | |
673 } | |
674 | |
675 // Test for signed saturation. | |
676 bool pos_a = (ua >> 63) == 0; | |
677 bool pos_b = (ub >> 63) == 0; | |
678 bool pos_r = (ur >> 63) == 0; | |
679 // If the signs of the operands are the same, but different from the result, | |
680 // there was an overflow. | |
681 if ((pos_a == pos_b) && (pos_a != pos_r)) { | |
682 dst.SetSignedSat(i, pos_a); | |
683 } | |
684 | |
685 dst.SetInt(vform, i, ur >> (64 - lane_size)); | |
686 } | |
687 return dst; | |
688 } | |
689 | |
690 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst, | |
691 const LogicVRegister& src1, | |
692 const LogicVRegister& src2) { | |
693 SimVRegister temp1, temp2; | |
694 uzp1(vform, temp1, src1, src2); | |
695 uzp2(vform, temp2, src1, src2); | |
696 add(vform, dst, temp1, temp2); | |
697 return dst; | |
698 } | |
699 | |
700 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst, | |
701 const LogicVRegister& src1, | |
702 const LogicVRegister& src2) { | |
703 SimVRegister temp; | |
704 mul(vform, temp, src1, src2); | |
705 add(vform, dst, dst, temp); | |
706 return dst; | |
707 } | |
708 | |
709 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst, | |
710 const LogicVRegister& src1, | |
711 const LogicVRegister& src2) { | |
712 SimVRegister temp; | |
713 mul(vform, temp, src1, src2); | |
714 sub(vform, dst, dst, temp); | |
715 return dst; | |
716 } | |
717 | |
718 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst, | |
719 const LogicVRegister& src1, | |
720 const LogicVRegister& src2) { | |
721 dst.ClearForWrite(vform); | |
722 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
723 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i)); | |
724 } | |
725 return dst; | |
726 } | |
727 | |
728 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst, | |
729 const LogicVRegister& src1, | |
730 const LogicVRegister& src2, int index) { | |
731 SimVRegister temp; | |
732 VectorFormat indexform = VectorFormatFillQ(vform); | |
733 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
734 } | |
735 | |
736 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst, | |
737 const LogicVRegister& src1, | |
738 const LogicVRegister& src2, int index) { | |
739 SimVRegister temp; | |
740 VectorFormat indexform = VectorFormatFillQ(vform); | |
741 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
742 } | |
743 | |
744 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst, | |
745 const LogicVRegister& src1, | |
746 const LogicVRegister& src2, int index) { | |
747 SimVRegister temp; | |
748 VectorFormat indexform = VectorFormatFillQ(vform); | |
749 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
750 } | |
751 | |
752 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst, | |
753 const LogicVRegister& src1, | |
754 const LogicVRegister& src2, int index) { | |
755 SimVRegister temp; | |
756 VectorFormat indexform = | |
757 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
758 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
759 } | |
760 | |
761 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst, | |
762 const LogicVRegister& src1, | |
763 const LogicVRegister& src2, int index) { | |
764 SimVRegister temp; | |
765 VectorFormat indexform = | |
766 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
767 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
768 } | |
769 | |
770 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst, | |
771 const LogicVRegister& src1, | |
772 const LogicVRegister& src2, int index) { | |
773 SimVRegister temp; | |
774 VectorFormat indexform = | |
775 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
776 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
777 } | |
778 | |
779 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst, | |
780 const LogicVRegister& src1, | |
781 const LogicVRegister& src2, int index) { | |
782 SimVRegister temp; | |
783 VectorFormat indexform = | |
784 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
785 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
786 } | |
787 | |
788 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst, | |
789 const LogicVRegister& src1, | |
790 const LogicVRegister& src2, int index) { | |
791 SimVRegister temp; | |
792 VectorFormat indexform = | |
793 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
794 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
795 } | |
796 | |
797 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst, | |
798 const LogicVRegister& src1, | |
799 const LogicVRegister& src2, int index) { | |
800 SimVRegister temp; | |
801 VectorFormat indexform = | |
802 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
803 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
804 } | |
805 | |
806 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst, | |
807 const LogicVRegister& src1, | |
808 const LogicVRegister& src2, int index) { | |
809 SimVRegister temp; | |
810 VectorFormat indexform = | |
811 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
812 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
813 } | |
814 | |
815 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst, | |
816 const LogicVRegister& src1, | |
817 const LogicVRegister& src2, int index) { | |
818 SimVRegister temp; | |
819 VectorFormat indexform = | |
820 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
821 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
822 } | |
823 | |
824 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst, | |
825 const LogicVRegister& src1, | |
826 const LogicVRegister& src2, int index) { | |
827 SimVRegister temp; | |
828 VectorFormat indexform = | |
829 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
830 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
831 } | |
832 | |
833 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst, | |
834 const LogicVRegister& src1, | |
835 const LogicVRegister& src2, int index) { | |
836 SimVRegister temp; | |
837 VectorFormat indexform = | |
838 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
839 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
840 } | |
841 | |
842 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst, | |
843 const LogicVRegister& src1, | |
844 const LogicVRegister& src2, int index) { | |
845 SimVRegister temp; | |
846 VectorFormat indexform = | |
847 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
848 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
849 } | |
850 | |
851 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst, | |
852 const LogicVRegister& src1, | |
853 const LogicVRegister& src2, int index) { | |
854 SimVRegister temp; | |
855 VectorFormat indexform = | |
856 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
857 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
858 } | |
859 | |
860 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst, | |
861 const LogicVRegister& src1, | |
862 const LogicVRegister& src2, int index) { | |
863 SimVRegister temp; | |
864 VectorFormat indexform = | |
865 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
866 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
867 } | |
868 | |
869 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst, | |
870 const LogicVRegister& src1, | |
871 const LogicVRegister& src2, int index) { | |
872 SimVRegister temp; | |
873 VectorFormat indexform = | |
874 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
875 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
876 } | |
877 | |
878 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst, | |
879 const LogicVRegister& src1, | |
880 const LogicVRegister& src2, int index) { | |
881 SimVRegister temp; | |
882 VectorFormat indexform = | |
883 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
884 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
885 } | |
886 | |
887 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst, | |
888 const LogicVRegister& src1, | |
889 const LogicVRegister& src2, int index) { | |
890 SimVRegister temp; | |
891 VectorFormat indexform = | |
892 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
893 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
894 } | |
895 | |
896 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst, | |
897 const LogicVRegister& src1, | |
898 const LogicVRegister& src2, int index) { | |
899 SimVRegister temp; | |
900 VectorFormat indexform = | |
901 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
902 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
903 } | |
904 | |
905 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst, | |
906 const LogicVRegister& src1, | |
907 const LogicVRegister& src2, int index) { | |
908 SimVRegister temp; | |
909 VectorFormat indexform = | |
910 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); | |
911 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
912 } | |
913 | |
914 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst, | |
915 const LogicVRegister& src1, | |
916 const LogicVRegister& src2, int index) { | |
917 SimVRegister temp; | |
918 VectorFormat indexform = VectorFormatFillQ(vform); | |
919 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
920 } | |
921 | |
922 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst, | |
923 const LogicVRegister& src1, | |
924 const LogicVRegister& src2, int index) { | |
925 SimVRegister temp; | |
926 VectorFormat indexform = VectorFormatFillQ(vform); | |
927 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); | |
928 } | |
929 | |
930 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) { | |
931 uint16_t result = 0; | |
932 uint16_t extended_op2 = op2; | |
933 for (int i = 0; i < 8; ++i) { | |
934 if ((op1 >> i) & 1) { | |
935 result = result ^ (extended_op2 << i); | |
936 } | |
937 } | |
938 return result; | |
939 } | |
940 | |
941 LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst, | |
942 const LogicVRegister& src1, | |
943 const LogicVRegister& src2) { | |
944 dst.ClearForWrite(vform); | |
945 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
946 dst.SetUint(vform, i, | |
947 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i))); | |
948 } | |
949 return dst; | |
950 } | |
951 | |
952 LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst, | |
953 const LogicVRegister& src1, | |
954 const LogicVRegister& src2) { | |
955 VectorFormat vform_src = VectorFormatHalfWidth(vform); | |
956 dst.ClearForWrite(vform); | |
957 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
958 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, i), | |
959 src2.Uint(vform_src, i))); | |
960 } | |
961 return dst; | |
962 } | |
963 | |
964 LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst, | |
965 const LogicVRegister& src1, | |
966 const LogicVRegister& src2) { | |
967 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform); | |
968 dst.ClearForWrite(vform); | |
969 int lane_count = LaneCountFromFormat(vform); | |
970 for (int i = 0; i < lane_count; i++) { | |
971 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, lane_count + i), | |
972 src2.Uint(vform_src, lane_count + i))); | |
973 } | |
974 return dst; | |
975 } | |
976 | |
977 LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst, | |
978 const LogicVRegister& src1, | |
979 const LogicVRegister& src2) { | |
980 int lane_size = LaneSizeInBitsFromFormat(vform); | |
981 dst.ClearForWrite(vform); | |
982 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
983 // Test for unsigned saturation. | |
984 uint64_t ua = src1.UintLeftJustified(vform, i); | |
985 uint64_t ub = src2.UintLeftJustified(vform, i); | |
986 uint64_t ur = ua - ub; | |
987 if (ub > ua) { | |
988 dst.SetUnsignedSat(i, false); | |
989 } | |
990 | |
991 // Test for signed saturation. | |
992 bool pos_a = (ua >> 63) == 0; | |
993 bool pos_b = (ub >> 63) == 0; | |
994 bool pos_r = (ur >> 63) == 0; | |
995 // If the signs of the operands are different, and the sign of the first | |
996 // operand doesn't match the result, there was an overflow. | |
997 if ((pos_a != pos_b) && (pos_a != pos_r)) { | |
998 dst.SetSignedSat(i, pos_a); | |
999 } | |
1000 | |
1001 dst.SetInt(vform, i, ur >> (64 - lane_size)); | |
1002 } | |
1003 return dst; | |
1004 } | |
1005 | |
1006 LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst, | |
1007 const LogicVRegister& src1, | |
1008 const LogicVRegister& src2) { | |
1009 dst.ClearForWrite(vform); | |
1010 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1011 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i)); | |
1012 } | |
1013 return dst; | |
1014 } | |
1015 | |
1016 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst, | |
1017 const LogicVRegister& src1, | |
1018 const LogicVRegister& src2) { | |
1019 dst.ClearForWrite(vform); | |
1020 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1021 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i)); | |
1022 } | |
1023 return dst; | |
1024 } | |
1025 | |
1026 LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst, | |
1027 const LogicVRegister& src1, | |
1028 const LogicVRegister& src2) { | |
1029 dst.ClearForWrite(vform); | |
1030 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1031 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i)); | |
1032 } | |
1033 return dst; | |
1034 } | |
1035 | |
1036 LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst, | |
1037 const LogicVRegister& src1, | |
1038 const LogicVRegister& src2) { | |
1039 dst.ClearForWrite(vform); | |
1040 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1041 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i)); | |
1042 } | |
1043 return dst; | |
1044 } | |
1045 | |
1046 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst, | |
1047 const LogicVRegister& src1, | |
1048 const LogicVRegister& src2) { | |
1049 dst.ClearForWrite(vform); | |
1050 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1051 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i)); | |
1052 } | |
1053 return dst; | |
1054 } | |
1055 | |
1056 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst, | |
1057 const LogicVRegister& src, uint64_t imm) { | |
1058 uint64_t result[16]; | |
1059 int laneCount = LaneCountFromFormat(vform); | |
1060 for (int i = 0; i < laneCount; ++i) { | |
1061 result[i] = src.Uint(vform, i) & ~imm; | |
1062 } | |
1063 dst.SetUintArray(vform, result); | |
1064 return dst; | |
1065 } | |
1066 | |
1067 LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst, | |
1068 const LogicVRegister& src1, | |
1069 const LogicVRegister& src2) { | |
1070 dst.ClearForWrite(vform); | |
1071 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1072 uint64_t operand1 = dst.Uint(vform, i); | |
1073 uint64_t operand2 = ~src2.Uint(vform, i); | |
1074 uint64_t operand3 = src1.Uint(vform, i); | |
1075 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); | |
1076 dst.SetUint(vform, i, result); | |
1077 } | |
1078 return dst; | |
1079 } | |
1080 | |
1081 LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst, | |
1082 const LogicVRegister& src1, | |
1083 const LogicVRegister& src2) { | |
1084 dst.ClearForWrite(vform); | |
1085 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1086 uint64_t operand1 = dst.Uint(vform, i); | |
1087 uint64_t operand2 = src2.Uint(vform, i); | |
1088 uint64_t operand3 = src1.Uint(vform, i); | |
1089 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); | |
1090 dst.SetUint(vform, i, result); | |
1091 } | |
1092 return dst; | |
1093 } | |
1094 | |
1095 LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst, | |
1096 const LogicVRegister& src1, | |
1097 const LogicVRegister& src2) { | |
1098 dst.ClearForWrite(vform); | |
1099 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1100 uint64_t operand1 = src2.Uint(vform, i); | |
1101 uint64_t operand2 = dst.Uint(vform, i); | |
1102 uint64_t operand3 = src1.Uint(vform, i); | |
1103 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); | |
1104 dst.SetUint(vform, i, result); | |
1105 } | |
1106 return dst; | |
1107 } | |
1108 | |
1109 LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst, | |
1110 const LogicVRegister& src1, | |
1111 const LogicVRegister& src2, bool max) { | |
1112 dst.ClearForWrite(vform); | |
1113 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1114 int64_t src1_val = src1.Int(vform, i); | |
1115 int64_t src2_val = src2.Int(vform, i); | |
1116 int64_t dst_val; | |
1117 if (max) { | |
1118 dst_val = (src1_val > src2_val) ? src1_val : src2_val; | |
1119 } else { | |
1120 dst_val = (src1_val < src2_val) ? src1_val : src2_val; | |
1121 } | |
1122 dst.SetInt(vform, i, dst_val); | |
1123 } | |
1124 return dst; | |
1125 } | |
1126 | |
1127 LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst, | |
1128 const LogicVRegister& src1, | |
1129 const LogicVRegister& src2) { | |
1130 return SMinMax(vform, dst, src1, src2, true); | |
1131 } | |
1132 | |
1133 LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst, | |
1134 const LogicVRegister& src1, | |
1135 const LogicVRegister& src2) { | |
1136 return SMinMax(vform, dst, src1, src2, false); | |
1137 } | |
1138 | |
1139 LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst, | |
1140 const LogicVRegister& src1, | |
1141 const LogicVRegister& src2, bool max) { | |
1142 int lanes = LaneCountFromFormat(vform); | |
1143 int64_t result[kMaxLanesPerVector]; | |
1144 const LogicVRegister* src = &src1; | |
1145 for (int j = 0; j < 2; j++) { | |
1146 for (int i = 0; i < lanes; i += 2) { | |
1147 int64_t first_val = src->Int(vform, i); | |
1148 int64_t second_val = src->Int(vform, i + 1); | |
1149 int64_t dst_val; | |
1150 if (max) { | |
1151 dst_val = (first_val > second_val) ? first_val : second_val; | |
1152 } else { | |
1153 dst_val = (first_val < second_val) ? first_val : second_val; | |
1154 } | |
1155 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector); | |
1156 result[(i >> 1) + (j * lanes / 2)] = dst_val; | |
1157 } | |
1158 src = &src2; | |
1159 } | |
1160 dst.SetIntArray(vform, result); | |
1161 return dst; | |
1162 } | |
1163 | |
1164 LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst, | |
1165 const LogicVRegister& src1, | |
1166 const LogicVRegister& src2) { | |
1167 return SMinMaxP(vform, dst, src1, src2, true); | |
1168 } | |
1169 | |
1170 LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst, | |
1171 const LogicVRegister& src1, | |
1172 const LogicVRegister& src2) { | |
1173 return SMinMaxP(vform, dst, src1, src2, false); | |
1174 } | |
1175 | |
1176 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst, | |
1177 const LogicVRegister& src) { | |
1178 DCHECK_EQ(vform, kFormatD); | |
1179 | |
1180 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1); | |
1181 dst.ClearForWrite(vform); | |
1182 dst.SetUint(vform, 0, dst_val); | |
1183 return dst; | |
1184 } | |
1185 | |
1186 LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst, | |
1187 const LogicVRegister& src) { | |
1188 VectorFormat vform_dst = | |
1189 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); | |
1190 | |
1191 int64_t dst_val = 0; | |
1192 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1193 dst_val += src.Int(vform, i); | |
1194 } | |
1195 | |
1196 dst.ClearForWrite(vform_dst); | |
1197 dst.SetInt(vform_dst, 0, dst_val); | |
1198 return dst; | |
1199 } | |
1200 | |
1201 LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst, | |
1202 const LogicVRegister& src) { | |
1203 VectorFormat vform_dst = | |
1204 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); | |
1205 | |
1206 int64_t dst_val = 0; | |
1207 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1208 dst_val += src.Int(vform, i); | |
1209 } | |
1210 | |
1211 dst.ClearForWrite(vform_dst); | |
1212 dst.SetInt(vform_dst, 0, dst_val); | |
1213 return dst; | |
1214 } | |
1215 | |
1216 LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst, | |
1217 const LogicVRegister& src) { | |
1218 VectorFormat vform_dst = | |
1219 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); | |
1220 | |
1221 uint64_t dst_val = 0; | |
1222 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1223 dst_val += src.Uint(vform, i); | |
1224 } | |
1225 | |
1226 dst.ClearForWrite(vform_dst); | |
1227 dst.SetUint(vform_dst, 0, dst_val); | |
1228 return dst; | |
1229 } | |
1230 | |
1231 LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst, | |
1232 const LogicVRegister& src, bool max) { | |
1233 int64_t dst_val = max ? INT64_MIN : INT64_MAX; | |
1234 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1235 int64_t src_val = src.Int(vform, i); | |
1236 if (max) { | |
1237 dst_val = (src_val > dst_val) ? src_val : dst_val; | |
1238 } else { | |
1239 dst_val = (src_val < dst_val) ? src_val : dst_val; | |
1240 } | |
1241 } | |
1242 dst.ClearForWrite(ScalarFormatFromFormat(vform)); | |
1243 dst.SetInt(vform, 0, dst_val); | |
1244 return dst; | |
1245 } | |
1246 | |
1247 LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst, | |
1248 const LogicVRegister& src) { | |
1249 SMinMaxV(vform, dst, src, true); | |
1250 return dst; | |
1251 } | |
1252 | |
1253 LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst, | |
1254 const LogicVRegister& src) { | |
1255 SMinMaxV(vform, dst, src, false); | |
1256 return dst; | |
1257 } | |
1258 | |
1259 LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst, | |
1260 const LogicVRegister& src1, | |
1261 const LogicVRegister& src2, bool max) { | |
1262 dst.ClearForWrite(vform); | |
1263 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1264 uint64_t src1_val = src1.Uint(vform, i); | |
1265 uint64_t src2_val = src2.Uint(vform, i); | |
1266 uint64_t dst_val; | |
1267 if (max) { | |
1268 dst_val = (src1_val > src2_val) ? src1_val : src2_val; | |
1269 } else { | |
1270 dst_val = (src1_val < src2_val) ? src1_val : src2_val; | |
1271 } | |
1272 dst.SetUint(vform, i, dst_val); | |
1273 } | |
1274 return dst; | |
1275 } | |
1276 | |
1277 LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst, | |
1278 const LogicVRegister& src1, | |
1279 const LogicVRegister& src2) { | |
1280 return UMinMax(vform, dst, src1, src2, true); | |
1281 } | |
1282 | |
1283 LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst, | |
1284 const LogicVRegister& src1, | |
1285 const LogicVRegister& src2) { | |
1286 return UMinMax(vform, dst, src1, src2, false); | |
1287 } | |
1288 | |
1289 LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst, | |
1290 const LogicVRegister& src1, | |
1291 const LogicVRegister& src2, bool max) { | |
1292 int lanes = LaneCountFromFormat(vform); | |
1293 uint64_t result[kMaxLanesPerVector]; | |
1294 const LogicVRegister* src = &src1; | |
1295 for (int j = 0; j < 2; j++) { | |
1296 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { | |
1297 uint64_t first_val = src->Uint(vform, i); | |
1298 uint64_t second_val = src->Uint(vform, i + 1); | |
1299 uint64_t dst_val; | |
1300 if (max) { | |
1301 dst_val = (first_val > second_val) ? first_val : second_val; | |
1302 } else { | |
1303 dst_val = (first_val < second_val) ? first_val : second_val; | |
1304 } | |
1305 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector); | |
1306 result[(i >> 1) + (j * lanes / 2)] = dst_val; | |
1307 } | |
1308 src = &src2; | |
1309 } | |
1310 dst.SetUintArray(vform, result); | |
1311 return dst; | |
1312 } | |
1313 | |
1314 LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst, | |
1315 const LogicVRegister& src1, | |
1316 const LogicVRegister& src2) { | |
1317 return UMinMaxP(vform, dst, src1, src2, true); | |
1318 } | |
1319 | |
1320 LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst, | |
1321 const LogicVRegister& src1, | |
1322 const LogicVRegister& src2) { | |
1323 return UMinMaxP(vform, dst, src1, src2, false); | |
1324 } | |
1325 | |
1326 LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst, | |
1327 const LogicVRegister& src, bool max) { | |
1328 uint64_t dst_val = max ? 0 : UINT64_MAX; | |
1329 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1330 uint64_t src_val = src.Uint(vform, i); | |
1331 if (max) { | |
1332 dst_val = (src_val > dst_val) ? src_val : dst_val; | |
1333 } else { | |
1334 dst_val = (src_val < dst_val) ? src_val : dst_val; | |
1335 } | |
1336 } | |
1337 dst.ClearForWrite(ScalarFormatFromFormat(vform)); | |
1338 dst.SetUint(vform, 0, dst_val); | |
1339 return dst; | |
1340 } | |
1341 | |
1342 LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst, | |
1343 const LogicVRegister& src) { | |
1344 UMinMaxV(vform, dst, src, true); | |
1345 return dst; | |
1346 } | |
1347 | |
1348 LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst, | |
1349 const LogicVRegister& src) { | |
1350 UMinMaxV(vform, dst, src, false); | |
1351 return dst; | |
1352 } | |
1353 | |
1354 LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst, | |
1355 const LogicVRegister& src, int shift) { | |
1356 DCHECK_GE(shift, 0); | |
1357 SimVRegister temp; | |
1358 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); | |
1359 return ushl(vform, dst, src, shiftreg); | |
1360 } | |
1361 | |
1362 LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst, | |
1363 const LogicVRegister& src, int shift) { | |
1364 DCHECK_GE(shift, 0); | |
1365 SimVRegister temp1, temp2; | |
1366 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); | |
1367 LogicVRegister extendedreg = sxtl(vform, temp2, src); | |
1368 return sshl(vform, dst, extendedreg, shiftreg); | |
1369 } | |
1370 | |
1371 LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst, | |
1372 const LogicVRegister& src, int shift) { | |
1373 DCHECK_GE(shift, 0); | |
1374 SimVRegister temp1, temp2; | |
1375 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); | |
1376 LogicVRegister extendedreg = sxtl2(vform, temp2, src); | |
1377 return sshl(vform, dst, extendedreg, shiftreg); | |
1378 } | |
1379 | |
1380 LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst, | |
1381 const LogicVRegister& src) { | |
1382 int shift = LaneSizeInBitsFromFormat(vform) / 2; | |
1383 return sshll(vform, dst, src, shift); | |
1384 } | |
1385 | |
1386 LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst, | |
1387 const LogicVRegister& src) { | |
1388 int shift = LaneSizeInBitsFromFormat(vform) / 2; | |
1389 return sshll2(vform, dst, src, shift); | |
1390 } | |
1391 | |
1392 LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst, | |
1393 const LogicVRegister& src, int shift) { | |
1394 DCHECK_GE(shift, 0); | |
1395 SimVRegister temp1, temp2; | |
1396 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); | |
1397 LogicVRegister extendedreg = uxtl(vform, temp2, src); | |
1398 return ushl(vform, dst, extendedreg, shiftreg); | |
1399 } | |
1400 | |
1401 LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst, | |
1402 const LogicVRegister& src, int shift) { | |
1403 DCHECK_GE(shift, 0); | |
1404 SimVRegister temp1, temp2; | |
1405 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); | |
1406 LogicVRegister extendedreg = uxtl2(vform, temp2, src); | |
1407 return ushl(vform, dst, extendedreg, shiftreg); | |
1408 } | |
1409 | |
1410 LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst, | |
1411 const LogicVRegister& src, int shift) { | |
1412 dst.ClearForWrite(vform); | |
1413 int laneCount = LaneCountFromFormat(vform); | |
1414 for (int i = 0; i < laneCount; i++) { | |
1415 uint64_t src_lane = src.Uint(vform, i); | |
1416 uint64_t dst_lane = dst.Uint(vform, i); | |
1417 uint64_t shifted = src_lane << shift; | |
1418 uint64_t mask = MaxUintFromFormat(vform) << shift; | |
1419 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); | |
1420 } | |
1421 return dst; | |
1422 } | |
1423 | |
1424 LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst, | |
1425 const LogicVRegister& src, int shift) { | |
1426 DCHECK_GE(shift, 0); | |
1427 SimVRegister temp; | |
1428 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); | |
1429 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform); | |
1430 } | |
1431 | |
1432 LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst, | |
1433 const LogicVRegister& src, int shift) { | |
1434 DCHECK_GE(shift, 0); | |
1435 SimVRegister temp; | |
1436 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); | |
1437 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform); | |
1438 } | |
1439 | |
1440 LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst, | |
1441 const LogicVRegister& src, int shift) { | |
1442 DCHECK_GE(shift, 0); | |
1443 SimVRegister temp; | |
1444 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); | |
1445 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform); | |
1446 } | |
1447 | |
1448 LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst, | |
1449 const LogicVRegister& src, int shift) { | |
1450 dst.ClearForWrite(vform); | |
1451 int laneCount = LaneCountFromFormat(vform); | |
1452 DCHECK((shift > 0) && | |
1453 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform)))); | |
1454 for (int i = 0; i < laneCount; i++) { | |
1455 uint64_t src_lane = src.Uint(vform, i); | |
1456 uint64_t dst_lane = dst.Uint(vform, i); | |
1457 uint64_t shifted; | |
1458 uint64_t mask; | |
1459 if (shift == 64) { | |
1460 shifted = 0; | |
1461 mask = 0; | |
1462 } else { | |
1463 shifted = src_lane >> shift; | |
1464 mask = MaxUintFromFormat(vform) >> shift; | |
1465 } | |
1466 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); | |
1467 } | |
1468 return dst; | |
1469 } | |
1470 | |
1471 LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst, | |
1472 const LogicVRegister& src, int shift) { | |
1473 DCHECK_GE(shift, 0); | |
1474 SimVRegister temp; | |
1475 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); | |
1476 return ushl(vform, dst, src, shiftreg); | |
1477 } | |
1478 | |
1479 LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst, | |
1480 const LogicVRegister& src, int shift) { | |
1481 DCHECK_GE(shift, 0); | |
1482 SimVRegister temp; | |
1483 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); | |
1484 return sshl(vform, dst, src, shiftreg); | |
1485 } | |
1486 | |
1487 LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst, | |
1488 const LogicVRegister& src, int shift) { | |
1489 SimVRegister temp; | |
1490 LogicVRegister shifted_reg = sshr(vform, temp, src, shift); | |
1491 return add(vform, dst, dst, shifted_reg); | |
1492 } | |
1493 | |
1494 LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst, | |
1495 const LogicVRegister& src, int shift) { | |
1496 SimVRegister temp; | |
1497 LogicVRegister shifted_reg = ushr(vform, temp, src, shift); | |
1498 return add(vform, dst, dst, shifted_reg); | |
1499 } | |
1500 | |
1501 LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst, | |
1502 const LogicVRegister& src, int shift) { | |
1503 SimVRegister temp; | |
1504 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform); | |
1505 return add(vform, dst, dst, shifted_reg); | |
1506 } | |
1507 | |
1508 LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst, | |
1509 const LogicVRegister& src, int shift) { | |
1510 SimVRegister temp; | |
1511 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform); | |
1512 return add(vform, dst, dst, shifted_reg); | |
1513 } | |
1514 | |
1515 LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst, | |
1516 const LogicVRegister& src) { | |
1517 uint64_t result[16]; | |
1518 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); | |
1519 int laneCount = LaneCountFromFormat(vform); | |
1520 for (int i = 0; i < laneCount; i++) { | |
1521 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits); | |
1522 } | |
1523 | |
1524 dst.SetUintArray(vform, result); | |
1525 return dst; | |
1526 } | |
1527 | |
1528 LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst, | |
1529 const LogicVRegister& src) { | |
1530 uint64_t result[16]; | |
1531 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); | |
1532 int laneCount = LaneCountFromFormat(vform); | |
1533 for (int i = 0; i < laneCount; i++) { | |
1534 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits); | |
1535 } | |
1536 | |
1537 dst.SetUintArray(vform, result); | |
1538 return dst; | |
1539 } | |
1540 | |
1541 LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst, | |
1542 const LogicVRegister& src) { | |
1543 uint64_t result[16]; | |
1544 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); | |
1545 int laneCount = LaneCountFromFormat(vform); | |
1546 for (int i = 0; i < laneCount; i++) { | |
1547 uint64_t value = src.Uint(vform, i); | |
1548 result[i] = 0; | |
1549 for (int j = 0; j < laneSizeInBits; j++) { | |
1550 result[i] += (value & 1); | |
1551 value >>= 1; | |
1552 } | |
1553 } | |
1554 | |
1555 dst.SetUintArray(vform, result); | |
1556 return dst; | |
1557 } | |
1558 | |
1559 LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst, | |
1560 const LogicVRegister& src1, | |
1561 const LogicVRegister& src2) { | |
1562 dst.ClearForWrite(vform); | |
1563 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1564 int8_t shift_val = src2.Int(vform, i); | |
1565 int64_t lj_src_val = src1.IntLeftJustified(vform, i); | |
1566 | |
1567 // Set signed saturation state. | |
1568 if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) && | |
1569 (lj_src_val != 0)) { | |
1570 dst.SetSignedSat(i, lj_src_val >= 0); | |
1571 } | |
1572 | |
1573 // Set unsigned saturation state. | |
1574 if (lj_src_val < 0) { | |
1575 dst.SetUnsignedSat(i, false); | |
1576 } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && | |
1577 (lj_src_val != 0)) { | |
1578 dst.SetUnsignedSat(i, true); | |
1579 } | |
1580 | |
1581 int64_t src_val = src1.Int(vform, i); | |
1582 bool src_is_negative = src_val < 0; | |
1583 if (shift_val > 63) { | |
1584 dst.SetInt(vform, i, 0); | |
1585 } else if (shift_val < -63) { | |
1586 dst.SetRounding(i, src_is_negative); | |
1587 dst.SetInt(vform, i, src_is_negative ? -1 : 0); | |
1588 } else { | |
1589 // Use unsigned types for shifts, as behaviour is undefined for signed | |
1590 // lhs. | |
1591 uint64_t usrc_val = static_cast<uint64_t>(src_val); | |
1592 | |
1593 if (shift_val < 0) { | |
1594 // Convert to right shift. | |
1595 shift_val = -shift_val; | |
1596 | |
1597 // Set rounding state by testing most-significant bit shifted out. | |
1598 // Rounding only needed on right shifts. | |
1599 if (((usrc_val >> (shift_val - 1)) & 1) == 1) { | |
1600 dst.SetRounding(i, true); | |
1601 } | |
1602 | |
1603 usrc_val >>= shift_val; | |
1604 | |
1605 if (src_is_negative) { | |
1606 // Simulate sign-extension. | |
1607 usrc_val |= (~UINT64_C(0) << (64 - shift_val)); | |
1608 } | |
1609 } else { | |
1610 usrc_val <<= shift_val; | |
1611 } | |
1612 dst.SetUint(vform, i, usrc_val); | |
1613 } | |
1614 } | |
1615 return dst; | |
1616 } | |
1617 | |
1618 LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst, | |
1619 const LogicVRegister& src1, | |
1620 const LogicVRegister& src2) { | |
1621 dst.ClearForWrite(vform); | |
1622 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1623 int8_t shift_val = src2.Int(vform, i); | |
1624 uint64_t lj_src_val = src1.UintLeftJustified(vform, i); | |
1625 | |
1626 // Set saturation state. | |
1627 if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) { | |
1628 dst.SetUnsignedSat(i, true); | |
1629 } | |
1630 | |
1631 uint64_t src_val = src1.Uint(vform, i); | |
1632 if ((shift_val > 63) || (shift_val < -64)) { | |
1633 dst.SetUint(vform, i, 0); | |
1634 } else { | |
1635 if (shift_val < 0) { | |
1636 // Set rounding state. Rounding only needed on right shifts. | |
1637 if (((src_val >> (-shift_val - 1)) & 1) == 1) { | |
1638 dst.SetRounding(i, true); | |
1639 } | |
1640 | |
1641 if (shift_val == -64) { | |
1642 src_val = 0; | |
1643 } else { | |
1644 src_val >>= -shift_val; | |
1645 } | |
1646 } else { | |
1647 src_val <<= shift_val; | |
1648 } | |
1649 dst.SetUint(vform, i, src_val); | |
1650 } | |
1651 } | |
1652 return dst; | |
1653 } | |
1654 | |
1655 LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst, | |
1656 const LogicVRegister& src) { | |
1657 dst.ClearForWrite(vform); | |
1658 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1659 // Test for signed saturation. | |
1660 int64_t sa = src.Int(vform, i); | |
1661 if (sa == MinIntFromFormat(vform)) { | |
1662 dst.SetSignedSat(i, true); | |
1663 } | |
1664 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); | |
1665 } | |
1666 return dst; | |
1667 } | |
1668 | |
1669 LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst, | |
1670 const LogicVRegister& src) { | |
1671 dst.ClearForWrite(vform); | |
1672 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1673 int64_t sa = dst.IntLeftJustified(vform, i); | |
1674 uint64_t ub = src.UintLeftJustified(vform, i); | |
1675 uint64_t ur = sa + ub; | |
1676 | |
1677 int64_t sr = bit_cast<int64_t>(ur); | |
1678 if (sr < sa) { // Test for signed positive saturation. | |
1679 dst.SetInt(vform, i, MaxIntFromFormat(vform)); | |
1680 } else { | |
1681 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i)); | |
1682 } | |
1683 } | |
1684 return dst; | |
1685 } | |
1686 | |
1687 LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst, | |
1688 const LogicVRegister& src) { | |
1689 dst.ClearForWrite(vform); | |
1690 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1691 uint64_t ua = dst.UintLeftJustified(vform, i); | |
1692 int64_t sb = src.IntLeftJustified(vform, i); | |
1693 uint64_t ur = ua + sb; | |
1694 | |
1695 if ((sb > 0) && (ur <= ua)) { | |
1696 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation. | |
1697 } else if ((sb < 0) && (ur >= ua)) { | |
1698 dst.SetUint(vform, i, 0); // Negative saturation. | |
1699 } else { | |
1700 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i)); | |
1701 } | |
1702 } | |
1703 return dst; | |
1704 } | |
1705 | |
1706 LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst, | |
1707 const LogicVRegister& src) { | |
1708 dst.ClearForWrite(vform); | |
1709 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1710 // Test for signed saturation. | |
1711 int64_t sa = src.Int(vform, i); | |
1712 if (sa == MinIntFromFormat(vform)) { | |
1713 dst.SetSignedSat(i, true); | |
1714 } | |
1715 if (sa < 0) { | |
1716 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); | |
1717 } else { | |
1718 dst.SetInt(vform, i, sa); | |
1719 } | |
1720 } | |
1721 return dst; | |
1722 } | |
1723 | |
1724 LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform, | |
1725 LogicVRegister dst, bool dstIsSigned, | |
1726 const LogicVRegister& src, | |
1727 bool srcIsSigned) { | |
1728 bool upperhalf = false; | |
1729 VectorFormat srcform = kFormatUndefined; | |
1730 int64_t ssrc[8]; | |
1731 uint64_t usrc[8]; | |
1732 | |
1733 switch (dstform) { | |
1734 case kFormat8B: | |
1735 upperhalf = false; | |
1736 srcform = kFormat8H; | |
1737 break; | |
1738 case kFormat16B: | |
1739 upperhalf = true; | |
1740 srcform = kFormat8H; | |
1741 break; | |
1742 case kFormat4H: | |
1743 upperhalf = false; | |
1744 srcform = kFormat4S; | |
1745 break; | |
1746 case kFormat8H: | |
1747 upperhalf = true; | |
1748 srcform = kFormat4S; | |
1749 break; | |
1750 case kFormat2S: | |
1751 upperhalf = false; | |
1752 srcform = kFormat2D; | |
1753 break; | |
1754 case kFormat4S: | |
1755 upperhalf = true; | |
1756 srcform = kFormat2D; | |
1757 break; | |
1758 case kFormatB: | |
1759 upperhalf = false; | |
1760 srcform = kFormatH; | |
1761 break; | |
1762 case kFormatH: | |
1763 upperhalf = false; | |
1764 srcform = kFormatS; | |
1765 break; | |
1766 case kFormatS: | |
1767 upperhalf = false; | |
1768 srcform = kFormatD; | |
1769 break; | |
1770 default: | |
1771 UNIMPLEMENTED(); | |
1772 } | |
1773 | |
1774 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { | |
1775 ssrc[i] = src.Int(srcform, i); | |
1776 usrc[i] = src.Uint(srcform, i); | |
1777 } | |
1778 | |
1779 int offset; | |
1780 if (upperhalf) { | |
1781 offset = LaneCountFromFormat(dstform) / 2; | |
1782 } else { | |
1783 offset = 0; | |
1784 dst.ClearForWrite(dstform); | |
1785 } | |
1786 | |
1787 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { | |
1788 // Test for signed saturation | |
1789 if (ssrc[i] > MaxIntFromFormat(dstform)) { | |
1790 dst.SetSignedSat(offset + i, true); | |
1791 } else if (ssrc[i] < MinIntFromFormat(dstform)) { | |
1792 dst.SetSignedSat(offset + i, false); | |
1793 } | |
1794 | |
1795 // Test for unsigned saturation | |
1796 if (srcIsSigned) { | |
1797 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) { | |
1798 dst.SetUnsignedSat(offset + i, true); | |
1799 } else if (ssrc[i] < 0) { | |
1800 dst.SetUnsignedSat(offset + i, false); | |
1801 } | |
1802 } else { | |
1803 if (usrc[i] > MaxUintFromFormat(dstform)) { | |
1804 dst.SetUnsignedSat(offset + i, true); | |
1805 } | |
1806 } | |
1807 | |
1808 int64_t result; | |
1809 if (srcIsSigned) { | |
1810 result = ssrc[i] & MaxUintFromFormat(dstform); | |
1811 } else { | |
1812 result = usrc[i] & MaxUintFromFormat(dstform); | |
1813 } | |
1814 | |
1815 if (dstIsSigned) { | |
1816 dst.SetInt(dstform, offset + i, result); | |
1817 } else { | |
1818 dst.SetUint(dstform, offset + i, result); | |
1819 } | |
1820 } | |
1821 return dst; | |
1822 } | |
1823 | |
1824 LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst, | |
1825 const LogicVRegister& src) { | |
1826 return ExtractNarrow(vform, dst, true, src, true); | |
1827 } | |
1828 | |
1829 LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst, | |
1830 const LogicVRegister& src) { | |
1831 return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform); | |
1832 } | |
1833 | |
1834 LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst, | |
1835 const LogicVRegister& src) { | |
1836 return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform); | |
1837 } | |
1838 | |
1839 LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst, | |
1840 const LogicVRegister& src) { | |
1841 return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform); | |
1842 } | |
1843 | |
1844 LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst, | |
1845 const LogicVRegister& src1, | |
1846 const LogicVRegister& src2, bool issigned) { | |
1847 dst.ClearForWrite(vform); | |
1848 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1849 if (issigned) { | |
1850 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i); | |
1851 sr = sr > 0 ? sr : -sr; | |
1852 dst.SetInt(vform, i, sr); | |
1853 } else { | |
1854 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i); | |
1855 sr = sr > 0 ? sr : -sr; | |
1856 dst.SetUint(vform, i, sr); | |
1857 } | |
1858 } | |
1859 return dst; | |
1860 } | |
1861 | |
1862 LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst, | |
1863 const LogicVRegister& src1, | |
1864 const LogicVRegister& src2) { | |
1865 SimVRegister temp; | |
1866 dst.ClearForWrite(vform); | |
1867 AbsDiff(vform, temp, src1, src2, true); | |
1868 add(vform, dst, dst, temp); | |
1869 return dst; | |
1870 } | |
1871 | |
1872 LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst, | |
1873 const LogicVRegister& src1, | |
1874 const LogicVRegister& src2) { | |
1875 SimVRegister temp; | |
1876 dst.ClearForWrite(vform); | |
1877 AbsDiff(vform, temp, src1, src2, false); | |
1878 add(vform, dst, dst, temp); | |
1879 return dst; | |
1880 } | |
1881 | |
1882 LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst, | |
1883 const LogicVRegister& src) { | |
1884 dst.ClearForWrite(vform); | |
1885 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
1886 dst.SetUint(vform, i, ~src.Uint(vform, i)); | |
1887 } | |
1888 return dst; | |
1889 } | |
1890 | |
1891 LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst, | |
1892 const LogicVRegister& src) { | |
1893 uint64_t result[16]; | |
1894 int laneCount = LaneCountFromFormat(vform); | |
1895 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); | |
1896 uint64_t reversed_value; | |
1897 uint64_t value; | |
1898 for (int i = 0; i < laneCount; i++) { | |
1899 value = src.Uint(vform, i); | |
1900 reversed_value = 0; | |
1901 for (int j = 0; j < laneSizeInBits; j++) { | |
1902 reversed_value = (reversed_value << 1) | (value & 1); | |
1903 value >>= 1; | |
1904 } | |
1905 result[i] = reversed_value; | |
1906 } | |
1907 | |
1908 dst.SetUintArray(vform, result); | |
1909 return dst; | |
1910 } | |
1911 | |
1912 LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst, | |
1913 const LogicVRegister& src, int revSize) { | |
1914 uint64_t result[16]; | |
1915 int laneCount = LaneCountFromFormat(vform); | |
1916 int laneSize = LaneSizeInBytesFromFormat(vform); | |
1917 int lanesPerLoop = revSize / laneSize; | |
1918 for (int i = 0; i < laneCount; i += lanesPerLoop) { | |
1919 for (int j = 0; j < lanesPerLoop; j++) { | |
1920 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j); | |
1921 } | |
1922 } | |
1923 dst.SetUintArray(vform, result); | |
1924 return dst; | |
1925 } | |
1926 | |
1927 LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst, | |
1928 const LogicVRegister& src) { | |
1929 return rev(vform, dst, src, 2); | |
1930 } | |
1931 | |
1932 LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst, | |
1933 const LogicVRegister& src) { | |
1934 return rev(vform, dst, src, 4); | |
1935 } | |
1936 | |
1937 LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst, | |
1938 const LogicVRegister& src) { | |
1939 return rev(vform, dst, src, 8); | |
1940 } | |
1941 | |
1942 LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst, | |
1943 const LogicVRegister& src, bool is_signed, | |
1944 bool do_accumulate) { | |
1945 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform); | |
1946 DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U); | |
1947 DCHECK_LE(LaneCountFromFormat(vform), 8); | |
1948 | |
1949 uint64_t result[8]; | |
1950 int lane_count = LaneCountFromFormat(vform); | |
1951 for (int i = 0; i < lane_count; i++) { | |
1952 if (is_signed) { | |
1953 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) + | |
1954 src.Int(vformsrc, 2 * i + 1)); | |
1955 } else { | |
1956 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1); | |
1957 } | |
1958 } | |
1959 | |
1960 dst.ClearForWrite(vform); | |
1961 for (int i = 0; i < lane_count; ++i) { | |
1962 if (do_accumulate) { | |
1963 result[i] += dst.Uint(vform, i); | |
1964 } | |
1965 dst.SetUint(vform, i, result[i]); | |
1966 } | |
1967 | |
1968 return dst; | |
1969 } | |
1970 | |
1971 LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst, | |
1972 const LogicVRegister& src) { | |
1973 return addlp(vform, dst, src, true, false); | |
1974 } | |
1975 | |
1976 LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst, | |
1977 const LogicVRegister& src) { | |
1978 return addlp(vform, dst, src, false, false); | |
1979 } | |
1980 | |
1981 LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst, | |
1982 const LogicVRegister& src) { | |
1983 return addlp(vform, dst, src, true, true); | |
1984 } | |
1985 | |
1986 LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst, | |
1987 const LogicVRegister& src) { | |
1988 return addlp(vform, dst, src, false, true); | |
1989 } | |
1990 | |
1991 LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst, | |
1992 const LogicVRegister& src1, | |
1993 const LogicVRegister& src2, int index) { | |
1994 uint8_t result[16]; | |
1995 int laneCount = LaneCountFromFormat(vform); | |
1996 for (int i = 0; i < laneCount - index; ++i) { | |
1997 result[i] = src1.Uint(vform, i + index); | |
1998 } | |
1999 for (int i = 0; i < index; ++i) { | |
2000 result[laneCount - index + i] = src2.Uint(vform, i); | |
2001 } | |
2002 dst.ClearForWrite(vform); | |
2003 for (int i = 0; i < laneCount; ++i) { | |
2004 dst.SetUint(vform, i, result[i]); | |
2005 } | |
2006 return dst; | |
2007 } | |
2008 | |
2009 LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst, | |
2010 const LogicVRegister& src, | |
2011 int src_index) { | |
2012 int laneCount = LaneCountFromFormat(vform); | |
2013 uint64_t value = src.Uint(vform, src_index); | |
2014 dst.ClearForWrite(vform); | |
2015 for (int i = 0; i < laneCount; ++i) { | |
2016 dst.SetUint(vform, i, value); | |
2017 } | |
2018 return dst; | |
2019 } | |
2020 | |
2021 LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst, | |
2022 uint64_t imm) { | |
2023 int laneCount = LaneCountFromFormat(vform); | |
2024 uint64_t value = imm & MaxUintFromFormat(vform); | |
2025 dst.ClearForWrite(vform); | |
2026 for (int i = 0; i < laneCount; ++i) { | |
2027 dst.SetUint(vform, i, value); | |
2028 } | |
2029 return dst; | |
2030 } | |
2031 | |
2032 LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst, | |
2033 int dst_index, const LogicVRegister& src, | |
2034 int src_index) { | |
2035 dst.SetUint(vform, dst_index, src.Uint(vform, src_index)); | |
2036 return dst; | |
2037 } | |
2038 | |
2039 LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst, | |
2040 int dst_index, uint64_t imm) { | |
2041 uint64_t value = imm & MaxUintFromFormat(vform); | |
2042 dst.SetUint(vform, dst_index, value); | |
2043 return dst; | |
2044 } | |
2045 | |
2046 LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst, | |
2047 uint64_t imm) { | |
2048 int laneCount = LaneCountFromFormat(vform); | |
2049 dst.ClearForWrite(vform); | |
2050 for (int i = 0; i < laneCount; ++i) { | |
2051 dst.SetUint(vform, i, imm); | |
2052 } | |
2053 return dst; | |
2054 } | |
2055 | |
2056 LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst, | |
2057 uint64_t imm) { | |
2058 int laneCount = LaneCountFromFormat(vform); | |
2059 dst.ClearForWrite(vform); | |
2060 for (int i = 0; i < laneCount; ++i) { | |
2061 dst.SetUint(vform, i, ~imm); | |
2062 } | |
2063 return dst; | |
2064 } | |
2065 | |
2066 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst, | |
2067 const LogicVRegister& src, uint64_t imm) { | |
2068 uint64_t result[16]; | |
2069 int laneCount = LaneCountFromFormat(vform); | |
2070 for (int i = 0; i < laneCount; ++i) { | |
2071 result[i] = src.Uint(vform, i) | imm; | |
2072 } | |
2073 dst.SetUintArray(vform, result); | |
2074 return dst; | |
2075 } | |
2076 | |
2077 LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst, | |
2078 const LogicVRegister& src) { | |
2079 VectorFormat vform_half = VectorFormatHalfWidth(vform); | |
2080 | |
2081 dst.ClearForWrite(vform); | |
2082 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
2083 dst.SetUint(vform, i, src.Uint(vform_half, i)); | |
2084 } | |
2085 return dst; | |
2086 } | |
2087 | |
2088 LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst, | |
2089 const LogicVRegister& src) { | |
2090 VectorFormat vform_half = VectorFormatHalfWidth(vform); | |
2091 | |
2092 dst.ClearForWrite(vform); | |
2093 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
2094 dst.SetInt(vform, i, src.Int(vform_half, i)); | |
2095 } | |
2096 return dst; | |
2097 } | |
2098 | |
2099 LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst, | |
2100 const LogicVRegister& src) { | |
2101 VectorFormat vform_half = VectorFormatHalfWidth(vform); | |
2102 int lane_count = LaneCountFromFormat(vform); | |
2103 | |
2104 dst.ClearForWrite(vform); | |
2105 for (int i = 0; i < lane_count; i++) { | |
2106 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i)); | |
2107 } | |
2108 return dst; | |
2109 } | |
2110 | |
2111 LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst, | |
2112 const LogicVRegister& src) { | |
2113 VectorFormat vform_half = VectorFormatHalfWidth(vform); | |
2114 int lane_count = LaneCountFromFormat(vform); | |
2115 | |
2116 dst.ClearForWrite(vform); | |
2117 for (int i = 0; i < lane_count; i++) { | |
2118 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i)); | |
2119 } | |
2120 return dst; | |
2121 } | |
2122 | |
2123 LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst, | |
2124 const LogicVRegister& src, int shift) { | |
2125 SimVRegister temp; | |
2126 VectorFormat vform_src = VectorFormatDoubleWidth(vform); | |
2127 VectorFormat vform_dst = vform; | |
2128 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift); | |
2129 return ExtractNarrow(vform_dst, dst, false, shifted_src, false); | |
2130 } | |
2131 | |
2132 LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst, | |
2133 const LogicVRegister& src, int shift) { | |
2134 SimVRegister temp; | |
2135 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); | |
2136 VectorFormat vformdst = vform; | |
2137 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift); | |
2138 return ExtractNarrow(vformdst, dst, false, shifted_src, false); | |
2139 } | |
2140 | |
2141 LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst, | |
2142 const LogicVRegister& src, int shift) { | |
2143 SimVRegister temp; | |
2144 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); | |
2145 VectorFormat vformdst = vform; | |
2146 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); | |
2147 return ExtractNarrow(vformdst, dst, false, shifted_src, false); | |
2148 } | |
2149 | |
2150 LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst, | |
2151 const LogicVRegister& src, int shift) { | |
2152 SimVRegister temp; | |
2153 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); | |
2154 VectorFormat vformdst = vform; | |
2155 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); | |
2156 return ExtractNarrow(vformdst, dst, false, shifted_src, false); | |
2157 } | |
2158 | |
2159 LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst, | |
2160 const LogicVRegister& ind, | |
2161 bool zero_out_of_bounds, | |
2162 const LogicVRegister* tab1, | |
2163 const LogicVRegister* tab2, | |
2164 const LogicVRegister* tab3, | |
2165 const LogicVRegister* tab4) { | |
2166 DCHECK_NOT_NULL(tab1); | |
2167 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4}; | |
2168 uint64_t result[kMaxLanesPerVector]; | |
2169 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
2170 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i); | |
2171 } | |
2172 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
2173 uint64_t j = ind.Uint(vform, i); | |
2174 int tab_idx = static_cast<int>(j >> 4); | |
2175 int j_idx = static_cast<int>(j & 15); | |
2176 if ((tab_idx < 4) && (tab[tab_idx] != NULL)) { | |
2177 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx); | |
2178 } | |
2179 } | |
2180 dst.SetUintArray(vform, result); | |
2181 return dst; | |
2182 } | |
2183 | |
2184 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, | |
2185 const LogicVRegister& tab, | |
2186 const LogicVRegister& ind) { | |
2187 return Table(vform, dst, ind, true, &tab); | |
2188 } | |
2189 | |
2190 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, | |
2191 const LogicVRegister& tab, | |
2192 const LogicVRegister& tab2, | |
2193 const LogicVRegister& ind) { | |
2194 return Table(vform, dst, ind, true, &tab, &tab2); | |
2195 } | |
2196 | |
2197 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, | |
2198 const LogicVRegister& tab, | |
2199 const LogicVRegister& tab2, | |
2200 const LogicVRegister& tab3, | |
2201 const LogicVRegister& ind) { | |
2202 return Table(vform, dst, ind, true, &tab, &tab2, &tab3); | |
2203 } | |
2204 | |
2205 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, | |
2206 const LogicVRegister& tab, | |
2207 const LogicVRegister& tab2, | |
2208 const LogicVRegister& tab3, | |
2209 const LogicVRegister& tab4, | |
2210 const LogicVRegister& ind) { | |
2211 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4); | |
2212 } | |
2213 | |
2214 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, | |
2215 const LogicVRegister& tab, | |
2216 const LogicVRegister& ind) { | |
2217 return Table(vform, dst, ind, false, &tab); | |
2218 } | |
2219 | |
2220 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, | |
2221 const LogicVRegister& tab, | |
2222 const LogicVRegister& tab2, | |
2223 const LogicVRegister& ind) { | |
2224 return Table(vform, dst, ind, false, &tab, &tab2); | |
2225 } | |
2226 | |
2227 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, | |
2228 const LogicVRegister& tab, | |
2229 const LogicVRegister& tab2, | |
2230 const LogicVRegister& tab3, | |
2231 const LogicVRegister& ind) { | |
2232 return Table(vform, dst, ind, false, &tab, &tab2, &tab3); | |
2233 } | |
2234 | |
2235 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, | |
2236 const LogicVRegister& tab, | |
2237 const LogicVRegister& tab2, | |
2238 const LogicVRegister& tab3, | |
2239 const LogicVRegister& tab4, | |
2240 const LogicVRegister& ind) { | |
2241 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4); | |
2242 } | |
2243 | |
2244 LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst, | |
2245 const LogicVRegister& src, int shift) { | |
2246 return shrn(vform, dst, src, shift).UnsignedSaturate(vform); | |
2247 } | |
2248 | |
2249 LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst, | |
2250 const LogicVRegister& src, int shift) { | |
2251 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform); | |
2252 } | |
2253 | |
2254 LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst, | |
2255 const LogicVRegister& src, int shift) { | |
2256 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform); | |
2257 } | |
2258 | |
2259 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst, | |
2260 const LogicVRegister& src, int shift) { | |
2261 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform); | |
2262 } | |
2263 | |
2264 LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst, | |
2265 const LogicVRegister& src, int shift) { | |
2266 SimVRegister temp; | |
2267 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); | |
2268 VectorFormat vformdst = vform; | |
2269 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); | |
2270 return sqxtn(vformdst, dst, shifted_src); | |
2271 } | |
2272 | |
2273 LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst, | |
2274 const LogicVRegister& src, int shift) { | |
2275 SimVRegister temp; | |
2276 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); | |
2277 VectorFormat vformdst = vform; | |
2278 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); | |
2279 return sqxtn(vformdst, dst, shifted_src); | |
2280 } | |
2281 | |
2282 LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst, | |
2283 const LogicVRegister& src, int shift) { | |
2284 SimVRegister temp; | |
2285 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); | |
2286 VectorFormat vformdst = vform; | |
2287 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); | |
2288 return sqxtn(vformdst, dst, shifted_src); | |
2289 } | |
2290 | |
2291 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst, | |
2292 const LogicVRegister& src, int shift) { | |
2293 SimVRegister temp; | |
2294 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); | |
2295 VectorFormat vformdst = vform; | |
2296 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); | |
2297 return sqxtn(vformdst, dst, shifted_src); | |
2298 } | |
2299 | |
2300 LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst, | |
2301 const LogicVRegister& src, int shift) { | |
2302 SimVRegister temp; | |
2303 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); | |
2304 VectorFormat vformdst = vform; | |
2305 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); | |
2306 return sqxtun(vformdst, dst, shifted_src); | |
2307 } | |
2308 | |
2309 LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst, | |
2310 const LogicVRegister& src, int shift) { | |
2311 SimVRegister temp; | |
2312 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); | |
2313 VectorFormat vformdst = vform; | |
2314 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); | |
2315 return sqxtun(vformdst, dst, shifted_src); | |
2316 } | |
2317 | |
2318 LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst, | |
2319 const LogicVRegister& src, int shift) { | |
2320 SimVRegister temp; | |
2321 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); | |
2322 VectorFormat vformdst = vform; | |
2323 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); | |
2324 return sqxtun(vformdst, dst, shifted_src); | |
2325 } | |
2326 | |
2327 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst, | |
2328 const LogicVRegister& src, int shift) { | |
2329 SimVRegister temp; | |
2330 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); | |
2331 VectorFormat vformdst = vform; | |
2332 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); | |
2333 return sqxtun(vformdst, dst, shifted_src); | |
2334 } | |
2335 | |
2336 LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst, | |
2337 const LogicVRegister& src1, | |
2338 const LogicVRegister& src2) { | |
2339 SimVRegister temp1, temp2; | |
2340 uxtl(vform, temp1, src1); | |
2341 uxtl(vform, temp2, src2); | |
2342 add(vform, dst, temp1, temp2); | |
2343 return dst; | |
2344 } | |
2345 | |
2346 LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst, | |
2347 const LogicVRegister& src1, | |
2348 const LogicVRegister& src2) { | |
2349 SimVRegister temp1, temp2; | |
2350 uxtl2(vform, temp1, src1); | |
2351 uxtl2(vform, temp2, src2); | |
2352 add(vform, dst, temp1, temp2); | |
2353 return dst; | |
2354 } | |
2355 | |
2356 LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst, | |
2357 const LogicVRegister& src1, | |
2358 const LogicVRegister& src2) { | |
2359 SimVRegister temp; | |
2360 uxtl(vform, temp, src2); | |
2361 add(vform, dst, src1, temp); | |
2362 return dst; | |
2363 } | |
2364 | |
2365 LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst, | |
2366 const LogicVRegister& src1, | |
2367 const LogicVRegister& src2) { | |
2368 SimVRegister temp; | |
2369 uxtl2(vform, temp, src2); | |
2370 add(vform, dst, src1, temp); | |
2371 return dst; | |
2372 } | |
2373 | |
2374 LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst, | |
2375 const LogicVRegister& src1, | |
2376 const LogicVRegister& src2) { | |
2377 SimVRegister temp1, temp2; | |
2378 sxtl(vform, temp1, src1); | |
2379 sxtl(vform, temp2, src2); | |
2380 add(vform, dst, temp1, temp2); | |
2381 return dst; | |
2382 } | |
2383 | |
2384 LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst, | |
2385 const LogicVRegister& src1, | |
2386 const LogicVRegister& src2) { | |
2387 SimVRegister temp1, temp2; | |
2388 sxtl2(vform, temp1, src1); | |
2389 sxtl2(vform, temp2, src2); | |
2390 add(vform, dst, temp1, temp2); | |
2391 return dst; | |
2392 } | |
2393 | |
2394 LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst, | |
2395 const LogicVRegister& src1, | |
2396 const LogicVRegister& src2) { | |
2397 SimVRegister temp; | |
2398 sxtl(vform, temp, src2); | |
2399 add(vform, dst, src1, temp); | |
2400 return dst; | |
2401 } | |
2402 | |
2403 LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst, | |
2404 const LogicVRegister& src1, | |
2405 const LogicVRegister& src2) { | |
2406 SimVRegister temp; | |
2407 sxtl2(vform, temp, src2); | |
2408 add(vform, dst, src1, temp); | |
2409 return dst; | |
2410 } | |
2411 | |
2412 LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst, | |
2413 const LogicVRegister& src1, | |
2414 const LogicVRegister& src2) { | |
2415 SimVRegister temp1, temp2; | |
2416 uxtl(vform, temp1, src1); | |
2417 uxtl(vform, temp2, src2); | |
2418 sub(vform, dst, temp1, temp2); | |
2419 return dst; | |
2420 } | |
2421 | |
2422 LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst, | |
2423 const LogicVRegister& src1, | |
2424 const LogicVRegister& src2) { | |
2425 SimVRegister temp1, temp2; | |
2426 uxtl2(vform, temp1, src1); | |
2427 uxtl2(vform, temp2, src2); | |
2428 sub(vform, dst, temp1, temp2); | |
2429 return dst; | |
2430 } | |
2431 | |
2432 LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst, | |
2433 const LogicVRegister& src1, | |
2434 const LogicVRegister& src2) { | |
2435 SimVRegister temp; | |
2436 uxtl(vform, temp, src2); | |
2437 sub(vform, dst, src1, temp); | |
2438 return dst; | |
2439 } | |
2440 | |
2441 LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst, | |
2442 const LogicVRegister& src1, | |
2443 const LogicVRegister& src2) { | |
2444 SimVRegister temp; | |
2445 uxtl2(vform, temp, src2); | |
2446 sub(vform, dst, src1, temp); | |
2447 return dst; | |
2448 } | |
2449 | |
2450 LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst, | |
2451 const LogicVRegister& src1, | |
2452 const LogicVRegister& src2) { | |
2453 SimVRegister temp1, temp2; | |
2454 sxtl(vform, temp1, src1); | |
2455 sxtl(vform, temp2, src2); | |
2456 sub(vform, dst, temp1, temp2); | |
2457 return dst; | |
2458 } | |
2459 | |
2460 LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst, | |
2461 const LogicVRegister& src1, | |
2462 const LogicVRegister& src2) { | |
2463 SimVRegister temp1, temp2; | |
2464 sxtl2(vform, temp1, src1); | |
2465 sxtl2(vform, temp2, src2); | |
2466 sub(vform, dst, temp1, temp2); | |
2467 return dst; | |
2468 } | |
2469 | |
2470 LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst, | |
2471 const LogicVRegister& src1, | |
2472 const LogicVRegister& src2) { | |
2473 SimVRegister temp; | |
2474 sxtl(vform, temp, src2); | |
2475 sub(vform, dst, src1, temp); | |
2476 return dst; | |
2477 } | |
2478 | |
2479 LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst, | |
2480 const LogicVRegister& src1, | |
2481 const LogicVRegister& src2) { | |
2482 SimVRegister temp; | |
2483 sxtl2(vform, temp, src2); | |
2484 sub(vform, dst, src1, temp); | |
2485 return dst; | |
2486 } | |
2487 | |
2488 LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst, | |
2489 const LogicVRegister& src1, | |
2490 const LogicVRegister& src2) { | |
2491 SimVRegister temp1, temp2; | |
2492 uxtl(vform, temp1, src1); | |
2493 uxtl(vform, temp2, src2); | |
2494 uaba(vform, dst, temp1, temp2); | |
2495 return dst; | |
2496 } | |
2497 | |
2498 LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst, | |
2499 const LogicVRegister& src1, | |
2500 const LogicVRegister& src2) { | |
2501 SimVRegister temp1, temp2; | |
2502 uxtl2(vform, temp1, src1); | |
2503 uxtl2(vform, temp2, src2); | |
2504 uaba(vform, dst, temp1, temp2); | |
2505 return dst; | |
2506 } | |
2507 | |
2508 LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst, | |
2509 const LogicVRegister& src1, | |
2510 const LogicVRegister& src2) { | |
2511 SimVRegister temp1, temp2; | |
2512 sxtl(vform, temp1, src1); | |
2513 sxtl(vform, temp2, src2); | |
2514 saba(vform, dst, temp1, temp2); | |
2515 return dst; | |
2516 } | |
2517 | |
2518 LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst, | |
2519 const LogicVRegister& src1, | |
2520 const LogicVRegister& src2) { | |
2521 SimVRegister temp1, temp2; | |
2522 sxtl2(vform, temp1, src1); | |
2523 sxtl2(vform, temp2, src2); | |
2524 saba(vform, dst, temp1, temp2); | |
2525 return dst; | |
2526 } | |
2527 | |
2528 LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst, | |
2529 const LogicVRegister& src1, | |
2530 const LogicVRegister& src2) { | |
2531 SimVRegister temp1, temp2; | |
2532 uxtl(vform, temp1, src1); | |
2533 uxtl(vform, temp2, src2); | |
2534 AbsDiff(vform, dst, temp1, temp2, false); | |
2535 return dst; | |
2536 } | |
2537 | |
2538 LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst, | |
2539 const LogicVRegister& src1, | |
2540 const LogicVRegister& src2) { | |
2541 SimVRegister temp1, temp2; | |
2542 uxtl2(vform, temp1, src1); | |
2543 uxtl2(vform, temp2, src2); | |
2544 AbsDiff(vform, dst, temp1, temp2, false); | |
2545 return dst; | |
2546 } | |
2547 | |
2548 LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst, | |
2549 const LogicVRegister& src1, | |
2550 const LogicVRegister& src2) { | |
2551 SimVRegister temp1, temp2; | |
2552 sxtl(vform, temp1, src1); | |
2553 sxtl(vform, temp2, src2); | |
2554 AbsDiff(vform, dst, temp1, temp2, true); | |
2555 return dst; | |
2556 } | |
2557 | |
2558 LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst, | |
2559 const LogicVRegister& src1, | |
2560 const LogicVRegister& src2) { | |
2561 SimVRegister temp1, temp2; | |
2562 sxtl2(vform, temp1, src1); | |
2563 sxtl2(vform, temp2, src2); | |
2564 AbsDiff(vform, dst, temp1, temp2, true); | |
2565 return dst; | |
2566 } | |
2567 | |
2568 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst, | |
2569 const LogicVRegister& src1, | |
2570 const LogicVRegister& src2) { | |
2571 SimVRegister temp1, temp2; | |
2572 uxtl(vform, temp1, src1); | |
2573 uxtl(vform, temp2, src2); | |
2574 mul(vform, dst, temp1, temp2); | |
2575 return dst; | |
2576 } | |
2577 | |
2578 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst, | |
2579 const LogicVRegister& src1, | |
2580 const LogicVRegister& src2) { | |
2581 SimVRegister temp1, temp2; | |
2582 uxtl2(vform, temp1, src1); | |
2583 uxtl2(vform, temp2, src2); | |
2584 mul(vform, dst, temp1, temp2); | |
2585 return dst; | |
2586 } | |
2587 | |
2588 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst, | |
2589 const LogicVRegister& src1, | |
2590 const LogicVRegister& src2) { | |
2591 SimVRegister temp1, temp2; | |
2592 sxtl(vform, temp1, src1); | |
2593 sxtl(vform, temp2, src2); | |
2594 mul(vform, dst, temp1, temp2); | |
2595 return dst; | |
2596 } | |
2597 | |
2598 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst, | |
2599 const LogicVRegister& src1, | |
2600 const LogicVRegister& src2) { | |
2601 SimVRegister temp1, temp2; | |
2602 sxtl2(vform, temp1, src1); | |
2603 sxtl2(vform, temp2, src2); | |
2604 mul(vform, dst, temp1, temp2); | |
2605 return dst; | |
2606 } | |
2607 | |
2608 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst, | |
2609 const LogicVRegister& src1, | |
2610 const LogicVRegister& src2) { | |
2611 SimVRegister temp1, temp2; | |
2612 uxtl(vform, temp1, src1); | |
2613 uxtl(vform, temp2, src2); | |
2614 mls(vform, dst, temp1, temp2); | |
2615 return dst; | |
2616 } | |
2617 | |
2618 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst, | |
2619 const LogicVRegister& src1, | |
2620 const LogicVRegister& src2) { | |
2621 SimVRegister temp1, temp2; | |
2622 uxtl2(vform, temp1, src1); | |
2623 uxtl2(vform, temp2, src2); | |
2624 mls(vform, dst, temp1, temp2); | |
2625 return dst; | |
2626 } | |
2627 | |
2628 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst, | |
2629 const LogicVRegister& src1, | |
2630 const LogicVRegister& src2) { | |
2631 SimVRegister temp1, temp2; | |
2632 sxtl(vform, temp1, src1); | |
2633 sxtl(vform, temp2, src2); | |
2634 mls(vform, dst, temp1, temp2); | |
2635 return dst; | |
2636 } | |
2637 | |
2638 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst, | |
2639 const LogicVRegister& src1, | |
2640 const LogicVRegister& src2) { | |
2641 SimVRegister temp1, temp2; | |
2642 sxtl2(vform, temp1, src1); | |
2643 sxtl2(vform, temp2, src2); | |
2644 mls(vform, dst, temp1, temp2); | |
2645 return dst; | |
2646 } | |
2647 | |
2648 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst, | |
2649 const LogicVRegister& src1, | |
2650 const LogicVRegister& src2) { | |
2651 SimVRegister temp1, temp2; | |
2652 uxtl(vform, temp1, src1); | |
2653 uxtl(vform, temp2, src2); | |
2654 mla(vform, dst, temp1, temp2); | |
2655 return dst; | |
2656 } | |
2657 | |
2658 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst, | |
2659 const LogicVRegister& src1, | |
2660 const LogicVRegister& src2) { | |
2661 SimVRegister temp1, temp2; | |
2662 uxtl2(vform, temp1, src1); | |
2663 uxtl2(vform, temp2, src2); | |
2664 mla(vform, dst, temp1, temp2); | |
2665 return dst; | |
2666 } | |
2667 | |
2668 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst, | |
2669 const LogicVRegister& src1, | |
2670 const LogicVRegister& src2) { | |
2671 SimVRegister temp1, temp2; | |
2672 sxtl(vform, temp1, src1); | |
2673 sxtl(vform, temp2, src2); | |
2674 mla(vform, dst, temp1, temp2); | |
2675 return dst; | |
2676 } | |
2677 | |
2678 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst, | |
2679 const LogicVRegister& src1, | |
2680 const LogicVRegister& src2) { | |
2681 SimVRegister temp1, temp2; | |
2682 sxtl2(vform, temp1, src1); | |
2683 sxtl2(vform, temp2, src2); | |
2684 mla(vform, dst, temp1, temp2); | |
2685 return dst; | |
2686 } | |
2687 | |
2688 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst, | |
2689 const LogicVRegister& src1, | |
2690 const LogicVRegister& src2) { | |
2691 SimVRegister temp; | |
2692 LogicVRegister product = sqdmull(vform, temp, src1, src2); | |
2693 return add(vform, dst, dst, product).SignedSaturate(vform); | |
2694 } | |
2695 | |
2696 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst, | |
2697 const LogicVRegister& src1, | |
2698 const LogicVRegister& src2) { | |
2699 SimVRegister temp; | |
2700 LogicVRegister product = sqdmull2(vform, temp, src1, src2); | |
2701 return add(vform, dst, dst, product).SignedSaturate(vform); | |
2702 } | |
2703 | |
2704 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst, | |
2705 const LogicVRegister& src1, | |
2706 const LogicVRegister& src2) { | |
2707 SimVRegister temp; | |
2708 LogicVRegister product = sqdmull(vform, temp, src1, src2); | |
2709 return sub(vform, dst, dst, product).SignedSaturate(vform); | |
2710 } | |
2711 | |
2712 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst, | |
2713 const LogicVRegister& src1, | |
2714 const LogicVRegister& src2) { | |
2715 SimVRegister temp; | |
2716 LogicVRegister product = sqdmull2(vform, temp, src1, src2); | |
2717 return sub(vform, dst, dst, product).SignedSaturate(vform); | |
2718 } | |
2719 | |
2720 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst, | |
2721 const LogicVRegister& src1, | |
2722 const LogicVRegister& src2) { | |
2723 SimVRegister temp; | |
2724 LogicVRegister product = smull(vform, temp, src1, src2); | |
2725 return add(vform, dst, product, product).SignedSaturate(vform); | |
2726 } | |
2727 | |
2728 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst, | |
2729 const LogicVRegister& src1, | |
2730 const LogicVRegister& src2) { | |
2731 SimVRegister temp; | |
2732 LogicVRegister product = smull2(vform, temp, src1, src2); | |
2733 return add(vform, dst, product, product).SignedSaturate(vform); | |
2734 } | |
2735 | |
2736 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst, | |
2737 const LogicVRegister& src1, | |
2738 const LogicVRegister& src2, bool round) { | |
2739 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow. | |
2740 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1) | |
2741 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize. | |
2742 | |
2743 int esize = LaneSizeInBitsFromFormat(vform); | |
2744 int round_const = round ? (1 << (esize - 2)) : 0; | |
2745 int64_t product; | |
2746 | |
2747 dst.ClearForWrite(vform); | |
2748 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
2749 product = src1.Int(vform, i) * src2.Int(vform, i); | |
2750 product += round_const; | |
2751 product = product >> (esize - 1); | |
2752 | |
2753 if (product > MaxIntFromFormat(vform)) { | |
2754 product = MaxIntFromFormat(vform); | |
2755 } else if (product < MinIntFromFormat(vform)) { | |
2756 product = MinIntFromFormat(vform); | |
2757 } | |
2758 dst.SetInt(vform, i, product); | |
2759 } | |
2760 return dst; | |
2761 } | |
2762 | |
2763 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst, | |
2764 const LogicVRegister& src1, | |
2765 const LogicVRegister& src2) { | |
2766 return sqrdmulh(vform, dst, src1, src2, false); | |
2767 } | |
2768 | |
2769 LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst, | |
2770 const LogicVRegister& src1, | |
2771 const LogicVRegister& src2) { | |
2772 SimVRegister temp; | |
2773 add(VectorFormatDoubleWidth(vform), temp, src1, src2); | |
2774 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); | |
2775 return dst; | |
2776 } | |
2777 | |
2778 LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst, | |
2779 const LogicVRegister& src1, | |
2780 const LogicVRegister& src2) { | |
2781 SimVRegister temp; | |
2782 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); | |
2783 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); | |
2784 return dst; | |
2785 } | |
2786 | |
2787 LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst, | |
2788 const LogicVRegister& src1, | |
2789 const LogicVRegister& src2) { | |
2790 SimVRegister temp; | |
2791 add(VectorFormatDoubleWidth(vform), temp, src1, src2); | |
2792 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); | |
2793 return dst; | |
2794 } | |
2795 | |
2796 LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst, | |
2797 const LogicVRegister& src1, | |
2798 const LogicVRegister& src2) { | |
2799 SimVRegister temp; | |
2800 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); | |
2801 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); | |
2802 return dst; | |
2803 } | |
2804 | |
2805 LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst, | |
2806 const LogicVRegister& src1, | |
2807 const LogicVRegister& src2) { | |
2808 SimVRegister temp; | |
2809 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); | |
2810 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); | |
2811 return dst; | |
2812 } | |
2813 | |
2814 LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst, | |
2815 const LogicVRegister& src1, | |
2816 const LogicVRegister& src2) { | |
2817 SimVRegister temp; | |
2818 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); | |
2819 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); | |
2820 return dst; | |
2821 } | |
2822 | |
2823 LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst, | |
2824 const LogicVRegister& src1, | |
2825 const LogicVRegister& src2) { | |
2826 SimVRegister temp; | |
2827 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); | |
2828 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); | |
2829 return dst; | |
2830 } | |
2831 | |
2832 LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst, | |
2833 const LogicVRegister& src1, | |
2834 const LogicVRegister& src2) { | |
2835 SimVRegister temp; | |
2836 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); | |
2837 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); | |
2838 return dst; | |
2839 } | |
2840 | |
2841 LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst, | |
2842 const LogicVRegister& src1, | |
2843 const LogicVRegister& src2) { | |
2844 uint64_t result[16]; | |
2845 int laneCount = LaneCountFromFormat(vform); | |
2846 int pairs = laneCount / 2; | |
2847 for (int i = 0; i < pairs; ++i) { | |
2848 result[2 * i] = src1.Uint(vform, 2 * i); | |
2849 result[(2 * i) + 1] = src2.Uint(vform, 2 * i); | |
2850 } | |
2851 | |
2852 dst.SetUintArray(vform, result); | |
2853 return dst; | |
2854 } | |
2855 | |
2856 LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst, | |
2857 const LogicVRegister& src1, | |
2858 const LogicVRegister& src2) { | |
2859 uint64_t result[16]; | |
2860 int laneCount = LaneCountFromFormat(vform); | |
2861 int pairs = laneCount / 2; | |
2862 for (int i = 0; i < pairs; ++i) { | |
2863 result[2 * i] = src1.Uint(vform, (2 * i) + 1); | |
2864 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1); | |
2865 } | |
2866 | |
2867 dst.SetUintArray(vform, result); | |
2868 return dst; | |
2869 } | |
2870 | |
2871 LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst, | |
2872 const LogicVRegister& src1, | |
2873 const LogicVRegister& src2) { | |
2874 uint64_t result[16]; | |
2875 int laneCount = LaneCountFromFormat(vform); | |
2876 int pairs = laneCount / 2; | |
2877 for (int i = 0; i < pairs; ++i) { | |
2878 result[2 * i] = src1.Uint(vform, i); | |
2879 result[(2 * i) + 1] = src2.Uint(vform, i); | |
2880 } | |
2881 | |
2882 dst.SetUintArray(vform, result); | |
2883 return dst; | |
2884 } | |
2885 | |
2886 LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst, | |
2887 const LogicVRegister& src1, | |
2888 const LogicVRegister& src2) { | |
2889 uint64_t result[16]; | |
2890 int laneCount = LaneCountFromFormat(vform); | |
2891 int pairs = laneCount / 2; | |
2892 for (int i = 0; i < pairs; ++i) { | |
2893 result[2 * i] = src1.Uint(vform, pairs + i); | |
2894 result[(2 * i) + 1] = src2.Uint(vform, pairs + i); | |
2895 } | |
2896 | |
2897 dst.SetUintArray(vform, result); | |
2898 return dst; | |
2899 } | |
2900 | |
2901 LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst, | |
2902 const LogicVRegister& src1, | |
2903 const LogicVRegister& src2) { | |
2904 uint64_t result[32]; | |
2905 int laneCount = LaneCountFromFormat(vform); | |
2906 for (int i = 0; i < laneCount; ++i) { | |
2907 result[i] = src1.Uint(vform, i); | |
2908 result[laneCount + i] = src2.Uint(vform, i); | |
2909 } | |
2910 | |
2911 dst.ClearForWrite(vform); | |
2912 for (int i = 0; i < laneCount; ++i) { | |
2913 dst.SetUint(vform, i, result[2 * i]); | |
2914 } | |
2915 return dst; | |
2916 } | |
2917 | |
2918 LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst, | |
2919 const LogicVRegister& src1, | |
2920 const LogicVRegister& src2) { | |
2921 uint64_t result[32]; | |
2922 int laneCount = LaneCountFromFormat(vform); | |
2923 for (int i = 0; i < laneCount; ++i) { | |
2924 result[i] = src1.Uint(vform, i); | |
2925 result[laneCount + i] = src2.Uint(vform, i); | |
2926 } | |
2927 | |
2928 dst.ClearForWrite(vform); | |
2929 for (int i = 0; i < laneCount; ++i) { | |
2930 dst.SetUint(vform, i, result[(2 * i) + 1]); | |
2931 } | |
2932 return dst; | |
2933 } | |
2934 | |
2935 template <typename T> | |
2936 T Simulator::FPAdd(T op1, T op2) { | |
2937 T result = FPProcessNaNs(op1, op2); | |
2938 if (std::isnan(result)) return result; | |
2939 | |
2940 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) { | |
2941 // inf + -inf returns the default NaN. | |
2942 FPProcessException(); | |
2943 return FPDefaultNaN<T>(); | |
2944 } else { | |
2945 // Other cases should be handled by standard arithmetic. | |
2946 return op1 + op2; | |
2947 } | |
2948 } | |
2949 | |
2950 template <typename T> | |
2951 T Simulator::FPSub(T op1, T op2) { | |
2952 // NaNs should be handled elsewhere. | |
2953 DCHECK(!std::isnan(op1) && !std::isnan(op2)); | |
2954 | |
2955 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) { | |
2956 // inf - inf returns the default NaN. | |
2957 FPProcessException(); | |
2958 return FPDefaultNaN<T>(); | |
2959 } else { | |
2960 // Other cases should be handled by standard arithmetic. | |
2961 return op1 - op2; | |
2962 } | |
2963 } | |
2964 | |
2965 template <typename T> | |
2966 T Simulator::FPMul(T op1, T op2) { | |
2967 // NaNs should be handled elsewhere. | |
2968 DCHECK(!std::isnan(op1) && !std::isnan(op2)); | |
2969 | |
2970 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { | |
2971 // inf * 0.0 returns the default NaN. | |
2972 FPProcessException(); | |
2973 return FPDefaultNaN<T>(); | |
2974 } else { | |
2975 // Other cases should be handled by standard arithmetic. | |
2976 return op1 * op2; | |
2977 } | |
2978 } | |
2979 | |
2980 template <typename T> | |
2981 T Simulator::FPMulx(T op1, T op2) { | |
2982 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { | |
2983 // inf * 0.0 returns +/-2.0. | |
2984 T two = 2.0; | |
2985 return copysign(1.0, op1) * copysign(1.0, op2) * two; | |
2986 } | |
2987 return FPMul(op1, op2); | |
2988 } | |
2989 | |
2990 template <typename T> | |
2991 T Simulator::FPMulAdd(T a, T op1, T op2) { | |
2992 T result = FPProcessNaNs3(a, op1, op2); | |
2993 | |
2994 T sign_a = copysign(1.0, a); | |
2995 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2); | |
2996 bool isinf_prod = std::isinf(op1) || std::isinf(op2); | |
2997 bool operation_generates_nan = | |
2998 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0 | |
2999 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf | |
3000 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf | |
3001 | |
3002 if (std::isnan(result)) { | |
3003 // Generated NaNs override quiet NaNs propagated from a. | |
3004 if (operation_generates_nan && IsQuietNaN(a)) { | |
3005 FPProcessException(); | |
3006 return FPDefaultNaN<T>(); | |
3007 } else { | |
3008 return result; | |
3009 } | |
3010 } | |
3011 | |
3012 // If the operation would produce a NaN, return the default NaN. | |
3013 if (operation_generates_nan) { | |
3014 FPProcessException(); | |
3015 return FPDefaultNaN<T>(); | |
3016 } | |
3017 | |
3018 // Work around broken fma implementations for exact zero results: The sign of | |
3019 // exact 0.0 results is positive unless both a and op1 * op2 are negative. | |
3020 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) { | |
3021 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0; | |
3022 } | |
3023 | |
3024 result = FusedMultiplyAdd(op1, op2, a); | |
3025 DCHECK(!std::isnan(result)); | |
3026 | |
3027 // Work around broken fma implementations for rounded zero results: If a is | |
3028 // 0.0, the sign of the result is the sign of op1 * op2 before rounding. | |
3029 if ((a == 0.0) && (result == 0.0)) { | |
3030 return copysign(0.0, sign_prod); | |
3031 } | |
3032 | |
3033 return result; | |
3034 } | |
3035 | |
3036 template <typename T> | |
3037 T Simulator::FPDiv(T op1, T op2) { | |
3038 // NaNs should be handled elsewhere. | |
3039 DCHECK(!std::isnan(op1) && !std::isnan(op2)); | |
3040 | |
3041 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) { | |
3042 // inf / inf and 0.0 / 0.0 return the default NaN. | |
3043 FPProcessException(); | |
3044 return FPDefaultNaN<T>(); | |
3045 } else { | |
3046 if (op2 == 0.0) { | |
3047 FPProcessException(); | |
3048 if (!std::isnan(op1)) { | |
3049 double op1_sign = copysign(1.0, op1); | |
3050 double op2_sign = copysign(1.0, op2); | |
3051 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity); | |
3052 } | |
3053 } | |
3054 | |
3055 // Other cases should be handled by standard arithmetic. | |
3056 return op1 / op2; | |
3057 } | |
3058 } | |
3059 | |
3060 template <typename T> | |
3061 T Simulator::FPSqrt(T op) { | |
3062 if (std::isnan(op)) { | |
3063 return FPProcessNaN(op); | |
3064 } else if (op < 0.0) { | |
3065 FPProcessException(); | |
3066 return FPDefaultNaN<T>(); | |
3067 } else { | |
3068 return sqrt(op); | |
3069 } | |
3070 } | |
3071 | |
3072 template <typename T> | |
3073 T Simulator::FPMax(T a, T b) { | |
3074 T result = FPProcessNaNs(a, b); | |
3075 if (std::isnan(result)) return result; | |
3076 | |
3077 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { | |
3078 // a and b are zero, and the sign differs: return +0.0. | |
3079 return 0.0; | |
3080 } else { | |
3081 return (a > b) ? a : b; | |
3082 } | |
3083 } | |
3084 | |
3085 template <typename T> | |
3086 T Simulator::FPMaxNM(T a, T b) { | |
3087 if (IsQuietNaN(a) && !IsQuietNaN(b)) { | |
3088 a = kFP64NegativeInfinity; | |
3089 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { | |
3090 b = kFP64NegativeInfinity; | |
3091 } | |
3092 | |
3093 T result = FPProcessNaNs(a, b); | |
3094 return std::isnan(result) ? result : FPMax(a, b); | |
3095 } | |
3096 | |
3097 template <typename T> | |
3098 T Simulator::FPMin(T a, T b) { | |
3099 T result = FPProcessNaNs(a, b); | |
3100 if (std::isnan(result)) return result; | |
3101 | |
3102 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { | |
3103 // a and b are zero, and the sign differs: return -0.0. | |
3104 return -0.0; | |
3105 } else { | |
3106 return (a < b) ? a : b; | |
3107 } | |
3108 } | |
3109 | |
3110 template <typename T> | |
3111 T Simulator::FPMinNM(T a, T b) { | |
3112 if (IsQuietNaN(a) && !IsQuietNaN(b)) { | |
3113 a = kFP64PositiveInfinity; | |
3114 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { | |
3115 b = kFP64PositiveInfinity; | |
3116 } | |
3117 | |
3118 T result = FPProcessNaNs(a, b); | |
3119 return std::isnan(result) ? result : FPMin(a, b); | |
3120 } | |
3121 | |
3122 template <typename T> | |
3123 T Simulator::FPRecipStepFused(T op1, T op2) { | |
3124 const T two = 2.0; | |
3125 if ((std::isinf(op1) && (op2 == 0.0)) || | |
3126 ((op1 == 0.0) && (std::isinf(op2)))) { | |
3127 return two; | |
3128 } else if (std::isinf(op1) || std::isinf(op2)) { | |
3129 // Return +inf if signs match, otherwise -inf. | |
3130 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity | |
3131 : kFP64NegativeInfinity; | |
3132 } else { | |
3133 return FusedMultiplyAdd(op1, op2, two); | |
3134 } | |
3135 } | |
3136 | |
3137 template <typename T> | |
3138 T Simulator::FPRSqrtStepFused(T op1, T op2) { | |
3139 const T one_point_five = 1.5; | |
3140 const T two = 2.0; | |
3141 | |
3142 if ((std::isinf(op1) && (op2 == 0.0)) || | |
3143 ((op1 == 0.0) && (std::isinf(op2)))) { | |
3144 return one_point_five; | |
3145 } else if (std::isinf(op1) || std::isinf(op2)) { | |
3146 // Return +inf if signs match, otherwise -inf. | |
3147 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity | |
3148 : kFP64NegativeInfinity; | |
3149 } else { | |
3150 // The multiply-add-halve operation must be fully fused, so avoid interim | |
3151 // rounding by checking which operand can be losslessly divided by two | |
3152 // before doing the multiply-add. | |
3153 if (std::isnormal(op1 / two)) { | |
3154 return FusedMultiplyAdd(op1 / two, op2, one_point_five); | |
3155 } else if (std::isnormal(op2 / two)) { | |
3156 return FusedMultiplyAdd(op1, op2 / two, one_point_five); | |
3157 } else { | |
3158 // Neither operand is normal after halving: the result is dominated by | |
3159 // the addition term, so just return that. | |
3160 return one_point_five; | |
3161 } | |
3162 } | |
3163 } | |
3164 | |
3165 double Simulator::FPRoundInt(double value, FPRounding round_mode) { | |
3166 if ((value == 0.0) || (value == kFP64PositiveInfinity) || | |
3167 (value == kFP64NegativeInfinity)) { | |
3168 return value; | |
3169 } else if (std::isnan(value)) { | |
3170 return FPProcessNaN(value); | |
3171 } | |
3172 | |
3173 double int_result = std::floor(value); | |
3174 double error = value - int_result; | |
3175 switch (round_mode) { | |
3176 case FPTieAway: { | |
3177 // Take care of correctly handling the range ]-0.5, -0.0], which must | |
3178 // yield -0.0. | |
3179 if ((-0.5 < value) && (value < 0.0)) { | |
3180 int_result = -0.0; | |
3181 | |
3182 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) { | |
3183 // If the error is greater than 0.5, or is equal to 0.5 and the integer | |
3184 // result is positive, round up. | |
3185 int_result++; | |
3186 } | |
3187 break; | |
3188 } | |
3189 case FPTieEven: { | |
3190 // Take care of correctly handling the range [-0.5, -0.0], which must | |
3191 // yield -0.0. | |
3192 if ((-0.5 <= value) && (value < 0.0)) { | |
3193 int_result = -0.0; | |
3194 | |
3195 // If the error is greater than 0.5, or is equal to 0.5 and the integer | |
3196 // result is odd, round up. | |
3197 } else if ((error > 0.5) || | |
3198 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) { | |
3199 int_result++; | |
3200 } | |
3201 break; | |
3202 } | |
3203 case FPZero: { | |
3204 // If value>0 then we take floor(value) | |
3205 // otherwise, ceil(value). | |
3206 if (value < 0) { | |
3207 int_result = ceil(value); | |
3208 } | |
3209 break; | |
3210 } | |
3211 case FPNegativeInfinity: { | |
3212 // We always use floor(value). | |
3213 break; | |
3214 } | |
3215 case FPPositiveInfinity: { | |
3216 // Take care of correctly handling the range ]-1.0, -0.0], which must | |
3217 // yield -0.0. | |
3218 if ((-1.0 < value) && (value < 0.0)) { | |
3219 int_result = -0.0; | |
3220 | |
3221 // If the error is non-zero, round up. | |
3222 } else if (error > 0.0) { | |
3223 int_result++; | |
3224 } | |
3225 break; | |
3226 } | |
3227 default: | |
3228 UNIMPLEMENTED(); | |
3229 } | |
3230 return int_result; | |
3231 } | |
3232 | |
3233 int32_t Simulator::FPToInt32(double value, FPRounding rmode) { | |
3234 value = FPRoundInt(value, rmode); | |
3235 if (value >= kWMaxInt) { | |
3236 return kWMaxInt; | |
3237 } else if (value < kWMinInt) { | |
3238 return kWMinInt; | |
3239 } | |
3240 return std::isnan(value) ? 0 : static_cast<int32_t>(value); | |
3241 } | |
3242 | |
3243 int64_t Simulator::FPToInt64(double value, FPRounding rmode) { | |
3244 value = FPRoundInt(value, rmode); | |
3245 if (value >= kXMaxInt) { | |
3246 return kXMaxInt; | |
3247 } else if (value < kXMinInt) { | |
3248 return kXMinInt; | |
3249 } | |
3250 return std::isnan(value) ? 0 : static_cast<int64_t>(value); | |
3251 } | |
3252 | |
3253 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) { | |
3254 value = FPRoundInt(value, rmode); | |
3255 if (value >= kWMaxUInt) { | |
3256 return kWMaxUInt; | |
3257 } else if (value < 0.0) { | |
3258 return 0; | |
3259 } | |
3260 return std::isnan(value) ? 0 : static_cast<uint32_t>(value); | |
3261 } | |
3262 | |
3263 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { | |
3264 value = FPRoundInt(value, rmode); | |
3265 if (value >= kXMaxUInt) { | |
3266 return kXMaxUInt; | |
3267 } else if (value < 0.0) { | |
3268 return 0; | |
3269 } | |
3270 return std::isnan(value) ? 0 : static_cast<uint64_t>(value); | |
3271 } | |
3272 | |
3273 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ | |
3274 template <typename T> \ | |
3275 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \ | |
3276 const LogicVRegister& src1, \ | |
3277 const LogicVRegister& src2) { \ | |
3278 dst.ClearForWrite(vform); \ | |
3279 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \ | |
3280 T op1 = src1.Float<T>(i); \ | |
3281 T op2 = src2.Float<T>(i); \ | |
3282 T result; \ | |
3283 if (PROCNAN) { \ | |
3284 result = FPProcessNaNs(op1, op2); \ | |
3285 if (!std::isnan(result)) { \ | |
3286 result = OP(op1, op2); \ | |
3287 } \ | |
3288 } else { \ | |
3289 result = OP(op1, op2); \ | |
3290 } \ | |
3291 dst.SetFloat(i, result); \ | |
3292 } \ | |
3293 return dst; \ | |
3294 } \ | |
3295 \ | |
3296 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \ | |
3297 const LogicVRegister& src1, \ | |
3298 const LogicVRegister& src2) { \ | |
3299 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { \ | |
3300 FN<float>(vform, dst, src1, src2); \ | |
3301 } else { \ | |
3302 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); \ | |
3303 FN<double>(vform, dst, src1, src2); \ | |
3304 } \ | |
3305 return dst; \ | |
3306 } | |
3307 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP) | |
3308 #undef DEFINE_NEON_FP_VECTOR_OP | |
3309 | |
3310 LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst, | |
3311 const LogicVRegister& src1, | |
3312 const LogicVRegister& src2) { | |
3313 SimVRegister temp; | |
3314 LogicVRegister product = fmul(vform, temp, src1, src2); | |
3315 return fneg(vform, dst, product); | |
3316 } | |
3317 | |
3318 template <typename T> | |
3319 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst, | |
3320 const LogicVRegister& src1, | |
3321 const LogicVRegister& src2) { | |
3322 dst.ClearForWrite(vform); | |
3323 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3324 T op1 = -src1.Float<T>(i); | |
3325 T op2 = src2.Float<T>(i); | |
3326 T result = FPProcessNaNs(op1, op2); | |
3327 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2)); | |
3328 } | |
3329 return dst; | |
3330 } | |
3331 | |
3332 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst, | |
3333 const LogicVRegister& src1, | |
3334 const LogicVRegister& src2) { | |
3335 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3336 frecps<float>(vform, dst, src1, src2); | |
3337 } else { | |
3338 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3339 frecps<double>(vform, dst, src1, src2); | |
3340 } | |
3341 return dst; | |
3342 } | |
3343 | |
3344 template <typename T> | |
3345 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst, | |
3346 const LogicVRegister& src1, | |
3347 const LogicVRegister& src2) { | |
3348 dst.ClearForWrite(vform); | |
3349 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3350 T op1 = -src1.Float<T>(i); | |
3351 T op2 = src2.Float<T>(i); | |
3352 T result = FPProcessNaNs(op1, op2); | |
3353 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2)); | |
3354 } | |
3355 return dst; | |
3356 } | |
3357 | |
3358 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst, | |
3359 const LogicVRegister& src1, | |
3360 const LogicVRegister& src2) { | |
3361 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3362 frsqrts<float>(vform, dst, src1, src2); | |
3363 } else { | |
3364 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3365 frsqrts<double>(vform, dst, src1, src2); | |
3366 } | |
3367 return dst; | |
3368 } | |
3369 | |
3370 template <typename T> | |
3371 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst, | |
3372 const LogicVRegister& src1, | |
3373 const LogicVRegister& src2, Condition cond) { | |
3374 dst.ClearForWrite(vform); | |
3375 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3376 bool result = false; | |
3377 T op1 = src1.Float<T>(i); | |
3378 T op2 = src2.Float<T>(i); | |
3379 T nan_result = FPProcessNaNs(op1, op2); | |
3380 if (!std::isnan(nan_result)) { | |
3381 switch (cond) { | |
3382 case eq: | |
3383 result = (op1 == op2); | |
3384 break; | |
3385 case ge: | |
3386 result = (op1 >= op2); | |
3387 break; | |
3388 case gt: | |
3389 result = (op1 > op2); | |
3390 break; | |
3391 case le: | |
3392 result = (op1 <= op2); | |
3393 break; | |
3394 case lt: | |
3395 result = (op1 < op2); | |
3396 break; | |
3397 default: | |
3398 UNREACHABLE(); | |
3399 break; | |
3400 } | |
3401 } | |
3402 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); | |
3403 } | |
3404 return dst; | |
3405 } | |
3406 | |
3407 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst, | |
3408 const LogicVRegister& src1, | |
3409 const LogicVRegister& src2, Condition cond) { | |
3410 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3411 fcmp<float>(vform, dst, src1, src2, cond); | |
3412 } else { | |
3413 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3414 fcmp<double>(vform, dst, src1, src2, cond); | |
3415 } | |
3416 return dst; | |
3417 } | |
3418 | |
3419 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst, | |
3420 const LogicVRegister& src, Condition cond) { | |
3421 SimVRegister temp; | |
3422 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3423 LogicVRegister zero_reg = | |
3424 dup_immediate(vform, temp, bit_cast<uint32_t>(0.0f)); | |
3425 fcmp<float>(vform, dst, src, zero_reg, cond); | |
3426 } else { | |
3427 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3428 LogicVRegister zero_reg = | |
3429 dup_immediate(vform, temp, bit_cast<uint64_t>(0.0)); | |
3430 fcmp<double>(vform, dst, src, zero_reg, cond); | |
3431 } | |
3432 return dst; | |
3433 } | |
3434 | |
3435 LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst, | |
3436 const LogicVRegister& src1, | |
3437 const LogicVRegister& src2, Condition cond) { | |
3438 SimVRegister temp1, temp2; | |
3439 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3440 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1); | |
3441 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2); | |
3442 fcmp<float>(vform, dst, abs_src1, abs_src2, cond); | |
3443 } else { | |
3444 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3445 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1); | |
3446 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2); | |
3447 fcmp<double>(vform, dst, abs_src1, abs_src2, cond); | |
3448 } | |
3449 return dst; | |
3450 } | |
3451 | |
3452 template <typename T> | |
3453 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, | |
3454 const LogicVRegister& src1, | |
3455 const LogicVRegister& src2) { | |
3456 dst.ClearForWrite(vform); | |
3457 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3458 T op1 = src1.Float<T>(i); | |
3459 T op2 = src2.Float<T>(i); | |
3460 T acc = dst.Float<T>(i); | |
3461 T result = FPMulAdd(acc, op1, op2); | |
3462 dst.SetFloat(i, result); | |
3463 } | |
3464 return dst; | |
3465 } | |
3466 | |
3467 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, | |
3468 const LogicVRegister& src1, | |
3469 const LogicVRegister& src2) { | |
3470 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3471 fmla<float>(vform, dst, src1, src2); | |
3472 } else { | |
3473 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3474 fmla<double>(vform, dst, src1, src2); | |
3475 } | |
3476 return dst; | |
3477 } | |
3478 | |
3479 template <typename T> | |
3480 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, | |
3481 const LogicVRegister& src1, | |
3482 const LogicVRegister& src2) { | |
3483 dst.ClearForWrite(vform); | |
3484 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3485 T op1 = -src1.Float<T>(i); | |
3486 T op2 = src2.Float<T>(i); | |
3487 T acc = dst.Float<T>(i); | |
3488 T result = FPMulAdd(acc, op1, op2); | |
3489 dst.SetFloat(i, result); | |
3490 } | |
3491 return dst; | |
3492 } | |
3493 | |
3494 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, | |
3495 const LogicVRegister& src1, | |
3496 const LogicVRegister& src2) { | |
3497 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3498 fmls<float>(vform, dst, src1, src2); | |
3499 } else { | |
3500 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3501 fmls<double>(vform, dst, src1, src2); | |
3502 } | |
3503 return dst; | |
3504 } | |
3505 | |
3506 template <typename T> | |
3507 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst, | |
3508 const LogicVRegister& src) { | |
3509 dst.ClearForWrite(vform); | |
3510 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3511 T op = src.Float<T>(i); | |
3512 op = -op; | |
3513 dst.SetFloat(i, op); | |
3514 } | |
3515 return dst; | |
3516 } | |
3517 | |
3518 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst, | |
3519 const LogicVRegister& src) { | |
3520 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3521 fneg<float>(vform, dst, src); | |
3522 } else { | |
3523 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3524 fneg<double>(vform, dst, src); | |
3525 } | |
3526 return dst; | |
3527 } | |
3528 | |
3529 template <typename T> | |
3530 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst, | |
3531 const LogicVRegister& src) { | |
3532 dst.ClearForWrite(vform); | |
3533 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3534 T op = src.Float<T>(i); | |
3535 if (copysign(1.0, op) < 0.0) { | |
3536 op = -op; | |
3537 } | |
3538 dst.SetFloat(i, op); | |
3539 } | |
3540 return dst; | |
3541 } | |
3542 | |
3543 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst, | |
3544 const LogicVRegister& src) { | |
3545 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3546 fabs_<float>(vform, dst, src); | |
3547 } else { | |
3548 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3549 fabs_<double>(vform, dst, src); | |
3550 } | |
3551 return dst; | |
3552 } | |
3553 | |
3554 LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst, | |
3555 const LogicVRegister& src1, | |
3556 const LogicVRegister& src2) { | |
3557 SimVRegister temp; | |
3558 fsub(vform, temp, src1, src2); | |
3559 fabs_(vform, dst, temp); | |
3560 return dst; | |
3561 } | |
3562 | |
3563 LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst, | |
3564 const LogicVRegister& src) { | |
3565 dst.ClearForWrite(vform); | |
3566 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3567 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3568 float result = FPSqrt(src.Float<float>(i)); | |
3569 dst.SetFloat(i, result); | |
3570 } | |
3571 } else { | |
3572 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3573 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3574 double result = FPSqrt(src.Float<double>(i)); | |
3575 dst.SetFloat(i, result); | |
3576 } | |
3577 } | |
3578 return dst; | |
3579 } | |
3580 | |
3581 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \ | |
3582 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \ | |
3583 const LogicVRegister& src1, \ | |
3584 const LogicVRegister& src2) { \ | |
3585 SimVRegister temp1, temp2; \ | |
3586 uzp1(vform, temp1, src1, src2); \ | |
3587 uzp2(vform, temp2, src1, src2); \ | |
3588 FN(vform, dst, temp1, temp2); \ | |
3589 return dst; \ | |
3590 } \ | |
3591 \ | |
3592 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \ | |
3593 const LogicVRegister& src) { \ | |
3594 if (vform == kFormatS) { \ | |
3595 float result = OP(src.Float<float>(0), src.Float<float>(1)); \ | |
3596 dst.SetFloat(0, result); \ | |
3597 } else { \ | |
3598 DCHECK_EQ(vform, kFormatD); \ | |
3599 double result = OP(src.Float<double>(0), src.Float<double>(1)); \ | |
3600 dst.SetFloat(0, result); \ | |
3601 } \ | |
3602 dst.ClearForWrite(vform); \ | |
3603 return dst; \ | |
3604 } | |
3605 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP) | |
3606 #undef DEFINE_NEON_FP_PAIR_OP | |
3607 | |
3608 LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst, | |
3609 const LogicVRegister& src, FPMinMaxOp Op) { | |
3610 DCHECK_EQ(vform, kFormat4S); | |
3611 USE(vform); | |
3612 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1)); | |
3613 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3)); | |
3614 float result = (this->*Op)(result1, result2); | |
3615 dst.ClearForWrite(kFormatS); | |
3616 dst.SetFloat<float>(0, result); | |
3617 return dst; | |
3618 } | |
3619 | |
3620 LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst, | |
3621 const LogicVRegister& src) { | |
3622 return FMinMaxV(vform, dst, src, &Simulator::FPMax); | |
3623 } | |
3624 | |
3625 LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst, | |
3626 const LogicVRegister& src) { | |
3627 return FMinMaxV(vform, dst, src, &Simulator::FPMin); | |
3628 } | |
3629 | |
3630 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst, | |
3631 const LogicVRegister& src) { | |
3632 return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM); | |
3633 } | |
3634 | |
3635 LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst, | |
3636 const LogicVRegister& src) { | |
3637 return FMinMaxV(vform, dst, src, &Simulator::FPMinNM); | |
3638 } | |
3639 | |
3640 LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst, | |
3641 const LogicVRegister& src1, | |
3642 const LogicVRegister& src2, int index) { | |
3643 dst.ClearForWrite(vform); | |
3644 SimVRegister temp; | |
3645 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3646 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); | |
3647 fmul<float>(vform, dst, src1, index_reg); | |
3648 } else { | |
3649 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3650 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); | |
3651 fmul<double>(vform, dst, src1, index_reg); | |
3652 } | |
3653 return dst; | |
3654 } | |
3655 | |
3656 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, | |
3657 const LogicVRegister& src1, | |
3658 const LogicVRegister& src2, int index) { | |
3659 dst.ClearForWrite(vform); | |
3660 SimVRegister temp; | |
3661 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3662 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); | |
3663 fmla<float>(vform, dst, src1, index_reg); | |
3664 } else { | |
3665 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3666 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); | |
3667 fmla<double>(vform, dst, src1, index_reg); | |
3668 } | |
3669 return dst; | |
3670 } | |
3671 | |
3672 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, | |
3673 const LogicVRegister& src1, | |
3674 const LogicVRegister& src2, int index) { | |
3675 dst.ClearForWrite(vform); | |
3676 SimVRegister temp; | |
3677 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3678 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); | |
3679 fmls<float>(vform, dst, src1, index_reg); | |
3680 } else { | |
3681 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3682 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); | |
3683 fmls<double>(vform, dst, src1, index_reg); | |
3684 } | |
3685 return dst; | |
3686 } | |
3687 | |
3688 LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst, | |
3689 const LogicVRegister& src1, | |
3690 const LogicVRegister& src2, int index) { | |
3691 dst.ClearForWrite(vform); | |
3692 SimVRegister temp; | |
3693 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3694 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); | |
3695 fmulx<float>(vform, dst, src1, index_reg); | |
3696 | |
3697 } else { | |
3698 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3699 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); | |
3700 fmulx<double>(vform, dst, src1, index_reg); | |
3701 } | |
3702 return dst; | |
3703 } | |
3704 | |
3705 LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst, | |
3706 const LogicVRegister& src, | |
3707 FPRounding rounding_mode, | |
3708 bool inexact_exception) { | |
3709 dst.ClearForWrite(vform); | |
3710 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3711 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3712 float input = src.Float<float>(i); | |
3713 float rounded = FPRoundInt(input, rounding_mode); | |
3714 if (inexact_exception && !std::isnan(input) && (input != rounded)) { | |
3715 FPProcessException(); | |
3716 } | |
3717 dst.SetFloat<float>(i, rounded); | |
3718 } | |
3719 } else { | |
3720 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3721 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3722 double input = src.Float<double>(i); | |
3723 double rounded = FPRoundInt(input, rounding_mode); | |
3724 if (inexact_exception && !std::isnan(input) && (input != rounded)) { | |
3725 FPProcessException(); | |
3726 } | |
3727 dst.SetFloat<double>(i, rounded); | |
3728 } | |
3729 } | |
3730 return dst; | |
3731 } | |
3732 | |
3733 LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst, | |
3734 const LogicVRegister& src, | |
3735 FPRounding rounding_mode, int fbits) { | |
3736 dst.ClearForWrite(vform); | |
3737 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3738 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3739 float op = src.Float<float>(i) * std::pow(2.0f, fbits); | |
3740 dst.SetInt(vform, i, FPToInt32(op, rounding_mode)); | |
3741 } | |
3742 } else { | |
3743 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3744 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3745 double op = src.Float<double>(i) * std::pow(2.0, fbits); | |
3746 dst.SetInt(vform, i, FPToInt64(op, rounding_mode)); | |
3747 } | |
3748 } | |
3749 return dst; | |
3750 } | |
3751 | |
3752 LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst, | |
3753 const LogicVRegister& src, | |
3754 FPRounding rounding_mode, int fbits) { | |
3755 dst.ClearForWrite(vform); | |
3756 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3757 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3758 float op = src.Float<float>(i) * std::pow(2.0f, fbits); | |
3759 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode)); | |
3760 } | |
3761 } else { | |
3762 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3763 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3764 double op = src.Float<double>(i) * std::pow(2.0, fbits); | |
3765 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode)); | |
3766 } | |
3767 } | |
3768 return dst; | |
3769 } | |
3770 | |
3771 LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst, | |
3772 const LogicVRegister& src) { | |
3773 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3774 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { | |
3775 dst.SetFloat(i, FPToFloat(src.Float<float16>(i))); | |
3776 } | |
3777 } else { | |
3778 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3779 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { | |
3780 dst.SetFloat(i, FPToDouble(src.Float<float>(i))); | |
3781 } | |
3782 } | |
3783 return dst; | |
3784 } | |
3785 | |
3786 LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst, | |
3787 const LogicVRegister& src) { | |
3788 int lane_count = LaneCountFromFormat(vform); | |
3789 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3790 for (int i = 0; i < lane_count; i++) { | |
3791 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count))); | |
3792 } | |
3793 } else { | |
3794 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3795 for (int i = 0; i < lane_count; i++) { | |
3796 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count))); | |
3797 } | |
3798 } | |
3799 return dst; | |
3800 } | |
3801 | |
3802 LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst, | |
3803 const LogicVRegister& src) { | |
3804 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) { | |
3805 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3806 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven)); | |
3807 } | |
3808 } else { | |
3809 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); | |
3810 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3811 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven)); | |
3812 } | |
3813 } | |
3814 return dst; | |
3815 } | |
3816 | |
3817 LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst, | |
3818 const LogicVRegister& src) { | |
3819 int lane_count = LaneCountFromFormat(vform) / 2; | |
3820 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) { | |
3821 for (int i = lane_count - 1; i >= 0; i--) { | |
3822 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven)); | |
3823 } | |
3824 } else { | |
3825 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); | |
3826 for (int i = lane_count - 1; i >= 0; i--) { | |
3827 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven)); | |
3828 } | |
3829 } | |
3830 return dst; | |
3831 } | |
3832 | |
3833 LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst, | |
3834 const LogicVRegister& src) { | |
3835 dst.ClearForWrite(vform); | |
3836 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); | |
3837 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3838 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd)); | |
3839 } | |
3840 return dst; | |
3841 } | |
3842 | |
3843 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst, | |
3844 const LogicVRegister& src) { | |
3845 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); | |
3846 int lane_count = LaneCountFromFormat(vform) / 2; | |
3847 for (int i = lane_count - 1; i >= 0; i--) { | |
3848 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd)); | |
3849 } | |
3850 return dst; | |
3851 } | |
3852 | |
3853 // Based on reference C function recip_sqrt_estimate from ARM ARM. | |
3854 double Simulator::recip_sqrt_estimate(double a) { | |
3855 int q0, q1, s; | |
3856 double r; | |
3857 if (a < 0.5) { | |
3858 q0 = static_cast<int>(a * 512.0); | |
3859 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0); | |
3860 } else { | |
3861 q1 = static_cast<int>(a * 256.0); | |
3862 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0); | |
3863 } | |
3864 s = static_cast<int>(256.0 * r + 0.5); | |
3865 return static_cast<double>(s) / 256.0; | |
3866 } | |
3867 | |
3868 namespace { | |
3869 | |
3870 inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) { | |
3871 return unsigned_bitextract_64(start_bit, end_bit, val); | |
3872 } | |
3873 | |
3874 } // anonymous namespace | |
3875 | |
3876 template <typename T> | |
3877 T Simulator::FPRecipSqrtEstimate(T op) { | |
3878 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value, | |
3879 "T must be a float or double"); | |
3880 | |
3881 if (std::isnan(op)) { | |
3882 return FPProcessNaN(op); | |
3883 } else if (op == 0.0) { | |
3884 if (copysign(1.0, op) < 0.0) { | |
3885 return kFP64NegativeInfinity; | |
3886 } else { | |
3887 return kFP64PositiveInfinity; | |
3888 } | |
3889 } else if (copysign(1.0, op) < 0.0) { | |
3890 FPProcessException(); | |
3891 return FPDefaultNaN<T>(); | |
3892 } else if (std::isinf(op)) { | |
3893 return 0.0; | |
3894 } else { | |
3895 uint64_t fraction; | |
3896 int32_t exp, result_exp; | |
3897 | |
3898 if (sizeof(T) == sizeof(float)) { | |
3899 exp = static_cast<int32_t>(float_exp(op)); | |
3900 fraction = float_mantissa(op); | |
3901 fraction <<= 29; | |
3902 } else { | |
3903 exp = static_cast<int32_t>(double_exp(op)); | |
3904 fraction = double_mantissa(op); | |
3905 } | |
3906 | |
3907 if (exp == 0) { | |
3908 while (Bits(fraction, 51, 51) == 0) { | |
3909 fraction = Bits(fraction, 50, 0) << 1; | |
3910 exp -= 1; | |
3911 } | |
3912 fraction = Bits(fraction, 50, 0) << 1; | |
3913 } | |
3914 | |
3915 double scaled; | |
3916 if (Bits(exp, 0, 0) == 0) { | |
3917 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44); | |
3918 } else { | |
3919 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44); | |
3920 } | |
3921 | |
3922 if (sizeof(T) == sizeof(float)) { | |
3923 result_exp = (380 - exp) / 2; | |
3924 } else { | |
3925 result_exp = (3068 - exp) / 2; | |
3926 } | |
3927 | |
3928 uint64_t estimate = bit_cast<uint64_t>(recip_sqrt_estimate(scaled)); | |
3929 | |
3930 if (sizeof(T) == sizeof(float)) { | |
3931 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); | |
3932 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29)); | |
3933 return float_pack(0, exp_bits, est_bits); | |
3934 } else { | |
3935 return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0)); | |
3936 } | |
3937 } | |
3938 } | |
3939 | |
3940 LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst, | |
3941 const LogicVRegister& src) { | |
3942 dst.ClearForWrite(vform); | |
3943 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
3944 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3945 float input = src.Float<float>(i); | |
3946 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input)); | |
3947 } | |
3948 } else { | |
3949 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
3950 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
3951 double input = src.Float<double>(i); | |
3952 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input)); | |
3953 } | |
3954 } | |
3955 return dst; | |
3956 } | |
3957 | |
3958 template <typename T> | |
3959 T Simulator::FPRecipEstimate(T op, FPRounding rounding) { | |
3960 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value, | |
3961 "T must be a float or double"); | |
3962 uint32_t sign; | |
3963 | |
3964 if (sizeof(T) == sizeof(float)) { | |
3965 sign = float_sign(op); | |
3966 } else { | |
3967 sign = double_sign(op); | |
3968 } | |
3969 | |
3970 if (std::isnan(op)) { | |
3971 return FPProcessNaN(op); | |
3972 } else if (std::isinf(op)) { | |
3973 return (sign == 1) ? -0.0 : 0.0; | |
3974 } else if (op == 0.0) { | |
3975 FPProcessException(); // FPExc_DivideByZero exception. | |
3976 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; | |
3977 } else if (((sizeof(T) == sizeof(float)) && | |
3978 (std::fabs(op) < std::pow(2.0, -128.0))) || | |
3979 ((sizeof(T) == sizeof(double)) && | |
3980 (std::fabs(op) < std::pow(2.0, -1024.0)))) { | |
3981 bool overflow_to_inf = false; | |
3982 switch (rounding) { | |
3983 case FPTieEven: | |
3984 overflow_to_inf = true; | |
3985 break; | |
3986 case FPPositiveInfinity: | |
3987 overflow_to_inf = (sign == 0); | |
3988 break; | |
3989 case FPNegativeInfinity: | |
3990 overflow_to_inf = (sign == 1); | |
3991 break; | |
3992 case FPZero: | |
3993 overflow_to_inf = false; | |
3994 break; | |
3995 default: | |
3996 break; | |
3997 } | |
3998 FPProcessException(); // FPExc_Overflow and FPExc_Inexact. | |
3999 if (overflow_to_inf) { | |
4000 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; | |
4001 } else { | |
4002 // Return FPMaxNormal(sign). | |
4003 if (sizeof(T) == sizeof(float)) { | |
4004 return float_pack(sign, 0xfe, 0x07fffff); | |
4005 } else { | |
4006 return double_pack(sign, 0x7fe, 0x0fffffffffffffl); | |
4007 } | |
4008 } | |
4009 } else { | |
4010 uint64_t fraction; | |
4011 int32_t exp, result_exp; | |
4012 uint32_t sign; | |
4013 | |
4014 if (sizeof(T) == sizeof(float)) { | |
4015 sign = float_sign(op); | |
4016 exp = static_cast<int32_t>(float_exp(op)); | |
4017 fraction = float_mantissa(op); | |
4018 fraction <<= 29; | |
4019 } else { | |
4020 sign = double_sign(op); | |
4021 exp = static_cast<int32_t>(double_exp(op)); | |
4022 fraction = double_mantissa(op); | |
4023 } | |
4024 | |
4025 if (exp == 0) { | |
4026 if (Bits(fraction, 51, 51) == 0) { | |
4027 exp -= 1; | |
4028 fraction = Bits(fraction, 49, 0) << 2; | |
4029 } else { | |
4030 fraction = Bits(fraction, 50, 0) << 1; | |
4031 } | |
4032 } | |
4033 | |
4034 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44); | |
4035 | |
4036 if (sizeof(T) == sizeof(float)) { | |
4037 result_exp = 253 - exp; | |
4038 } else { | |
4039 result_exp = 2045 - exp; | |
4040 } | |
4041 | |
4042 double estimate = recip_estimate(scaled); | |
4043 | |
4044 fraction = double_mantissa(estimate); | |
4045 if (result_exp == 0) { | |
4046 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1); | |
4047 } else if (result_exp == -1) { | |
4048 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2); | |
4049 result_exp = 0; | |
4050 } | |
4051 if (sizeof(T) == sizeof(float)) { | |
4052 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); | |
4053 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29)); | |
4054 return float_pack(sign, exp_bits, frac_bits); | |
4055 } else { | |
4056 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0)); | |
4057 } | |
4058 } | |
4059 } | |
4060 | |
4061 LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst, | |
4062 const LogicVRegister& src, FPRounding round) { | |
4063 dst.ClearForWrite(vform); | |
4064 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
4065 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
4066 float input = src.Float<float>(i); | |
4067 dst.SetFloat(i, FPRecipEstimate<float>(input, round)); | |
4068 } | |
4069 } else { | |
4070 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
4071 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
4072 double input = src.Float<double>(i); | |
4073 dst.SetFloat(i, FPRecipEstimate<double>(input, round)); | |
4074 } | |
4075 } | |
4076 return dst; | |
4077 } | |
4078 | |
4079 LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst, | |
4080 const LogicVRegister& src) { | |
4081 dst.ClearForWrite(vform); | |
4082 uint64_t operand; | |
4083 uint32_t result; | |
4084 double dp_operand, dp_result; | |
4085 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
4086 operand = src.Uint(vform, i); | |
4087 if (operand <= 0x3FFFFFFF) { | |
4088 result = 0xFFFFFFFF; | |
4089 } else { | |
4090 dp_operand = operand * std::pow(2.0, -32); | |
4091 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31); | |
4092 result = static_cast<uint32_t>(dp_result); | |
4093 } | |
4094 dst.SetUint(vform, i, result); | |
4095 } | |
4096 return dst; | |
4097 } | |
4098 | |
4099 // Based on reference C function recip_estimate from ARM ARM. | |
4100 double Simulator::recip_estimate(double a) { | |
4101 int q, s; | |
4102 double r; | |
4103 q = static_cast<int>(a * 512.0); | |
4104 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0); | |
4105 s = static_cast<int>(256.0 * r + 0.5); | |
4106 return static_cast<double>(s) / 256.0; | |
4107 } | |
4108 | |
4109 LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst, | |
4110 const LogicVRegister& src) { | |
4111 dst.ClearForWrite(vform); | |
4112 uint64_t operand; | |
4113 uint32_t result; | |
4114 double dp_operand, dp_result; | |
4115 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
4116 operand = src.Uint(vform, i); | |
4117 if (operand <= 0x7FFFFFFF) { | |
4118 result = 0xFFFFFFFF; | |
4119 } else { | |
4120 dp_operand = operand * std::pow(2.0, -32); | |
4121 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31); | |
4122 result = static_cast<uint32_t>(dp_result); | |
4123 } | |
4124 dst.SetUint(vform, i, result); | |
4125 } | |
4126 return dst; | |
4127 } | |
4128 | |
4129 template <typename T> | |
4130 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst, | |
4131 const LogicVRegister& src) { | |
4132 dst.ClearForWrite(vform); | |
4133 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
4134 T op = src.Float<T>(i); | |
4135 T result; | |
4136 if (std::isnan(op)) { | |
4137 result = FPProcessNaN(op); | |
4138 } else { | |
4139 int exp; | |
4140 uint32_t sign; | |
4141 if (sizeof(T) == sizeof(float)) { | |
4142 sign = float_sign(op); | |
4143 exp = static_cast<int>(float_exp(op)); | |
4144 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0)); | |
4145 result = float_pack(sign, exp, 0); | |
4146 } else { | |
4147 sign = double_sign(op); | |
4148 exp = static_cast<int>(double_exp(op)); | |
4149 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0)); | |
4150 result = double_pack(sign, exp, 0); | |
4151 } | |
4152 } | |
4153 dst.SetFloat(i, result); | |
4154 } | |
4155 return dst; | |
4156 } | |
4157 | |
4158 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst, | |
4159 const LogicVRegister& src) { | |
4160 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
4161 frecpx<float>(vform, dst, src); | |
4162 } else { | |
4163 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
4164 frecpx<double>(vform, dst, src); | |
4165 } | |
4166 return dst; | |
4167 } | |
4168 | |
4169 LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst, | |
4170 const LogicVRegister& src, int fbits, | |
4171 FPRounding round) { | |
4172 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
4173 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
4174 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round); | |
4175 dst.SetFloat<float>(i, result); | |
4176 } else { | |
4177 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
4178 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round); | |
4179 dst.SetFloat<double>(i, result); | |
4180 } | |
4181 } | |
4182 return dst; | |
4183 } | |
4184 | |
4185 LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst, | |
4186 const LogicVRegister& src, int fbits, | |
4187 FPRounding round) { | |
4188 for (int i = 0; i < LaneCountFromFormat(vform); i++) { | |
4189 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { | |
4190 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round); | |
4191 dst.SetFloat<float>(i, result); | |
4192 } else { | |
4193 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); | |
4194 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round); | |
4195 dst.SetFloat<double>(i, result); | |
4196 } | |
4197 } | |
4198 return dst; | |
4199 } | |
4200 | |
4201 #endif // USE_SIMULATOR | |
4202 | |
4203 } // namespace internal | |
4204 } // namespace v8 | |
4205 | |
4206 #endif // V8_TARGET_ARCH_ARM64 | |
OLD | NEW |