OLD | NEW |
(Empty) | |
| 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #if V8_TARGET_ARCH_ARM64 |
| 6 |
| 7 #include <cmath> |
| 8 #include "src/arm64/simulator-arm64.h" |
| 9 |
| 10 namespace v8 { |
| 11 namespace internal { |
| 12 |
| 13 #if defined(USE_SIMULATOR) |
| 14 |
| 15 namespace { |
| 16 |
| 17 // See FPRound for a description of this function. |
| 18 inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa, |
| 19 FPRounding round_mode) { |
| 20 uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>( |
| 21 sign, exponent, mantissa, round_mode); |
| 22 return bit_cast<double>(bits); |
| 23 } |
| 24 |
| 25 // See FPRound for a description of this function. |
| 26 inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa, |
| 27 FPRounding round_mode) { |
| 28 uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>( |
| 29 sign, exponent, mantissa, round_mode); |
| 30 return bit_cast<float>(bits); |
| 31 } |
| 32 |
| 33 // See FPRound for a description of this function. |
| 34 inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent, |
| 35 uint64_t mantissa, FPRounding round_mode) { |
| 36 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>( |
| 37 sign, exponent, mantissa, round_mode); |
| 38 } |
| 39 |
| 40 } // namespace |
| 41 |
| 42 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) { |
| 43 if (src >= 0) { |
| 44 return UFixedToDouble(src, fbits, round); |
| 45 } else if (src == INT64_MIN) { |
| 46 return -UFixedToDouble(src, fbits, round); |
| 47 } else { |
| 48 return -UFixedToDouble(-src, fbits, round); |
| 49 } |
| 50 } |
| 51 |
| 52 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) { |
| 53 // An input of 0 is a special case because the result is effectively |
| 54 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. |
| 55 if (src == 0) { |
| 56 return 0.0; |
| 57 } |
| 58 |
| 59 // Calculate the exponent. The highest significant bit will have the value |
| 60 // 2^exponent. |
| 61 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64); |
| 62 const int64_t exponent = highest_significant_bit - fbits; |
| 63 |
| 64 return FPRoundToDouble(0, exponent, src, round); |
| 65 } |
| 66 |
| 67 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) { |
| 68 if (src >= 0) { |
| 69 return UFixedToFloat(src, fbits, round); |
| 70 } else if (src == INT64_MIN) { |
| 71 return -UFixedToFloat(src, fbits, round); |
| 72 } else { |
| 73 return -UFixedToFloat(-src, fbits, round); |
| 74 } |
| 75 } |
| 76 |
| 77 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) { |
| 78 // An input of 0 is a special case because the result is effectively |
| 79 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. |
| 80 if (src == 0) { |
| 81 return 0.0f; |
| 82 } |
| 83 |
| 84 // Calculate the exponent. The highest significant bit will have the value |
| 85 // 2^exponent. |
| 86 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64); |
| 87 const int32_t exponent = highest_significant_bit - fbits; |
| 88 |
| 89 return FPRoundToFloat(0, exponent, src, round); |
| 90 } |
| 91 |
| 92 double Simulator::FPToDouble(float value) { |
| 93 switch (std::fpclassify(value)) { |
| 94 case FP_NAN: { |
| 95 if (IsSignallingNaN(value)) { |
| 96 FPProcessException(); |
| 97 } |
| 98 if (DN()) return kFP64DefaultNaN; |
| 99 |
| 100 // Convert NaNs as the processor would: |
| 101 // - The sign is propagated. |
| 102 // - The mantissa is transferred entirely, except that the top bit is |
| 103 // forced to '1', making the result a quiet NaN. The unused (low-order) |
| 104 // mantissa bits are set to 0. |
| 105 uint32_t raw = bit_cast<uint32_t>(value); |
| 106 |
| 107 uint64_t sign = raw >> 31; |
| 108 uint64_t exponent = (1 << kDoubleExponentBits) - 1; |
| 109 uint64_t mantissa = unsigned_bitextract_64(21, 0, raw); |
| 110 |
| 111 // Unused low-order bits remain zero. |
| 112 mantissa <<= (kDoubleMantissaBits - kFloatMantissaBits); |
| 113 |
| 114 // Force a quiet NaN. |
| 115 mantissa |= (UINT64_C(1) << (kDoubleMantissaBits - 1)); |
| 116 |
| 117 return double_pack(sign, exponent, mantissa); |
| 118 } |
| 119 |
| 120 case FP_ZERO: |
| 121 case FP_NORMAL: |
| 122 case FP_SUBNORMAL: |
| 123 case FP_INFINITE: { |
| 124 // All other inputs are preserved in a standard cast, because every value |
| 125 // representable using an IEEE-754 float is also representable using an |
| 126 // IEEE-754 double. |
| 127 return static_cast<double>(value); |
| 128 } |
| 129 } |
| 130 |
| 131 UNREACHABLE(); |
| 132 return kFP64DefaultNaN; |
| 133 } |
| 134 |
| 135 float Simulator::FPToFloat(float16 value) { |
| 136 uint32_t sign = value >> 15; |
| 137 uint32_t exponent = |
| 138 unsigned_bitextract_32(kFloat16MantissaBits + kFloat16ExponentBits - 1, |
| 139 kFloat16MantissaBits, value); |
| 140 uint32_t mantissa = |
| 141 unsigned_bitextract_32(kFloat16MantissaBits - 1, 0, value); |
| 142 |
| 143 switch (float16classify(value)) { |
| 144 case FP_ZERO: |
| 145 return (sign == 0) ? 0.0f : -0.0f; |
| 146 |
| 147 case FP_INFINITE: |
| 148 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity; |
| 149 |
| 150 case FP_SUBNORMAL: { |
| 151 // Calculate shift required to put mantissa into the most-significant bits |
| 152 // of the destination mantissa. |
| 153 int shift = CountLeadingZeros(mantissa << (32 - 10), 32); |
| 154 |
| 155 // Shift mantissa and discard implicit '1'. |
| 156 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1; |
| 157 mantissa &= (1 << kFloatMantissaBits) - 1; |
| 158 |
| 159 // Adjust the exponent for the shift applied, and rebias. |
| 160 exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias); |
| 161 break; |
| 162 } |
| 163 |
| 164 case FP_NAN: { |
| 165 if (IsSignallingNaN(value)) { |
| 166 FPProcessException(); |
| 167 } |
| 168 if (DN()) return kFP32DefaultNaN; |
| 169 |
| 170 // Convert NaNs as the processor would: |
| 171 // - The sign is propagated. |
| 172 // - The mantissa is transferred entirely, except that the top bit is |
| 173 // forced to '1', making the result a quiet NaN. The unused (low-order) |
| 174 // mantissa bits are set to 0. |
| 175 exponent = (1 << kFloatExponentBits) - 1; |
| 176 |
| 177 // Increase bits in mantissa, making low-order bits 0. |
| 178 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); |
| 179 mantissa |= 1 << (kFloatMantissaBits - 1); // Force a quiet NaN. |
| 180 break; |
| 181 } |
| 182 |
| 183 case FP_NORMAL: { |
| 184 // Increase bits in mantissa, making low-order bits 0. |
| 185 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); |
| 186 |
| 187 // Change exponent bias. |
| 188 exponent += (kFloatExponentBias - kFloat16ExponentBias); |
| 189 break; |
| 190 } |
| 191 |
| 192 default: |
| 193 UNREACHABLE(); |
| 194 return kFP32DefaultNaN; |
| 195 } |
| 196 return float_pack(sign, exponent, mantissa); |
| 197 } |
| 198 |
| 199 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) { |
| 200 // Only the FPTieEven rounding mode is implemented. |
| 201 DCHECK_EQ(round_mode, FPTieEven); |
| 202 USE(round_mode); |
| 203 |
| 204 int64_t sign = float_sign(value); |
| 205 int64_t exponent = |
| 206 static_cast<int64_t>(float_exp(value)) - kFloatExponentBias; |
| 207 uint32_t mantissa = float_mantissa(value); |
| 208 |
| 209 switch (std::fpclassify(value)) { |
| 210 case FP_NAN: { |
| 211 if (IsSignallingNaN(value)) { |
| 212 FPProcessException(); |
| 213 } |
| 214 if (DN()) return kFP16DefaultNaN; |
| 215 |
| 216 // Convert NaNs as the processor would: |
| 217 // - The sign is propagated. |
| 218 // - The mantissa is transferred as much as possible, except that the top |
| 219 // bit is forced to '1', making the result a quiet NaN. |
| 220 float16 result = |
| 221 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; |
| 222 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits); |
| 223 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN; |
| 224 return result; |
| 225 } |
| 226 |
| 227 case FP_ZERO: |
| 228 return (sign == 0) ? 0 : 0x8000; |
| 229 |
| 230 case FP_INFINITE: |
| 231 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; |
| 232 |
| 233 case FP_NORMAL: |
| 234 case FP_SUBNORMAL: { |
| 235 // Convert float-to-half as the processor would, assuming that FPCR.FZ |
| 236 // (flush-to-zero) is not set. |
| 237 |
| 238 // Add the implicit '1' bit to the mantissa. |
| 239 mantissa += (1 << kFloatMantissaBits); |
| 240 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); |
| 241 } |
| 242 } |
| 243 |
| 244 UNREACHABLE(); |
| 245 return kFP16DefaultNaN; |
| 246 } |
| 247 |
| 248 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) { |
| 249 // Only the FPTieEven rounding mode is implemented. |
| 250 DCHECK_EQ(round_mode, FPTieEven); |
| 251 USE(round_mode); |
| 252 |
| 253 int64_t sign = double_sign(value); |
| 254 int64_t exponent = |
| 255 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias; |
| 256 uint64_t mantissa = double_mantissa(value); |
| 257 |
| 258 switch (std::fpclassify(value)) { |
| 259 case FP_NAN: { |
| 260 if (IsSignallingNaN(value)) { |
| 261 FPProcessException(); |
| 262 } |
| 263 if (DN()) return kFP16DefaultNaN; |
| 264 |
| 265 // Convert NaNs as the processor would: |
| 266 // - The sign is propagated. |
| 267 // - The mantissa is transferred as much as possible, except that the top |
| 268 // bit is forced to '1', making the result a quiet NaN. |
| 269 float16 result = |
| 270 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; |
| 271 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits); |
| 272 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN; |
| 273 return result; |
| 274 } |
| 275 |
| 276 case FP_ZERO: |
| 277 return (sign == 0) ? 0 : 0x8000; |
| 278 |
| 279 case FP_INFINITE: |
| 280 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; |
| 281 |
| 282 case FP_NORMAL: |
| 283 case FP_SUBNORMAL: { |
| 284 // Convert double-to-half as the processor would, assuming that FPCR.FZ |
| 285 // (flush-to-zero) is not set. |
| 286 |
| 287 // Add the implicit '1' bit to the mantissa. |
| 288 mantissa += (UINT64_C(1) << kDoubleMantissaBits); |
| 289 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); |
| 290 } |
| 291 } |
| 292 |
| 293 UNREACHABLE(); |
| 294 return kFP16DefaultNaN; |
| 295 } |
| 296 |
| 297 float Simulator::FPToFloat(double value, FPRounding round_mode) { |
| 298 // Only the FPTieEven rounding mode is implemented. |
| 299 DCHECK((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); |
| 300 USE(round_mode); |
| 301 |
| 302 switch (std::fpclassify(value)) { |
| 303 case FP_NAN: { |
| 304 if (IsSignallingNaN(value)) { |
| 305 FPProcessException(); |
| 306 } |
| 307 if (DN()) return kFP32DefaultNaN; |
| 308 |
| 309 // Convert NaNs as the processor would: |
| 310 // - The sign is propagated. |
| 311 // - The mantissa is transferred as much as possible, except that the |
| 312 // top bit is forced to '1', making the result a quiet NaN. |
| 313 |
| 314 uint64_t raw = bit_cast<uint64_t>(value); |
| 315 |
| 316 uint32_t sign = raw >> 63; |
| 317 uint32_t exponent = (1 << 8) - 1; |
| 318 uint32_t mantissa = static_cast<uint32_t>(unsigned_bitextract_64( |
| 319 50, kDoubleMantissaBits - kFloatMantissaBits, raw)); |
| 320 mantissa |= (1 << (kFloatMantissaBits - 1)); // Force a quiet NaN. |
| 321 |
| 322 return float_pack(sign, exponent, mantissa); |
| 323 } |
| 324 |
| 325 case FP_ZERO: |
| 326 case FP_INFINITE: { |
| 327 // In a C++ cast, any value representable in the target type will be |
| 328 // unchanged. This is always the case for +/-0.0 and infinities. |
| 329 return static_cast<float>(value); |
| 330 } |
| 331 |
| 332 case FP_NORMAL: |
| 333 case FP_SUBNORMAL: { |
| 334 // Convert double-to-float as the processor would, assuming that FPCR.FZ |
| 335 // (flush-to-zero) is not set. |
| 336 uint32_t sign = double_sign(value); |
| 337 int64_t exponent = |
| 338 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias; |
| 339 uint64_t mantissa = double_mantissa(value); |
| 340 if (std::fpclassify(value) == FP_NORMAL) { |
| 341 // For normal FP values, add the hidden bit. |
| 342 mantissa |= (UINT64_C(1) << kDoubleMantissaBits); |
| 343 } |
| 344 return FPRoundToFloat(sign, exponent, mantissa, round_mode); |
| 345 } |
| 346 } |
| 347 |
| 348 UNREACHABLE(); |
| 349 return kFP32DefaultNaN; |
| 350 } |
| 351 |
| 352 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) { |
| 353 dst.ClearForWrite(vform); |
| 354 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 355 dst.ReadUintFromMem(vform, i, addr); |
| 356 addr += LaneSizeInBytesFromFormat(vform); |
| 357 } |
| 358 } |
| 359 |
| 360 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index, |
| 361 uint64_t addr) { |
| 362 dst.ReadUintFromMem(vform, index, addr); |
| 363 } |
| 364 |
| 365 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) { |
| 366 dst.ClearForWrite(vform); |
| 367 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 368 dst.ReadUintFromMem(vform, i, addr); |
| 369 } |
| 370 } |
| 371 |
| 372 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1, |
| 373 LogicVRegister dst2, uint64_t addr1) { |
| 374 dst1.ClearForWrite(vform); |
| 375 dst2.ClearForWrite(vform); |
| 376 int esize = LaneSizeInBytesFromFormat(vform); |
| 377 uint64_t addr2 = addr1 + esize; |
| 378 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 379 dst1.ReadUintFromMem(vform, i, addr1); |
| 380 dst2.ReadUintFromMem(vform, i, addr2); |
| 381 addr1 += 2 * esize; |
| 382 addr2 += 2 * esize; |
| 383 } |
| 384 } |
| 385 |
| 386 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1, |
| 387 LogicVRegister dst2, int index, uint64_t addr1) { |
| 388 dst1.ClearForWrite(vform); |
| 389 dst2.ClearForWrite(vform); |
| 390 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); |
| 391 dst1.ReadUintFromMem(vform, index, addr1); |
| 392 dst2.ReadUintFromMem(vform, index, addr2); |
| 393 } |
| 394 |
| 395 void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1, |
| 396 LogicVRegister dst2, uint64_t addr) { |
| 397 dst1.ClearForWrite(vform); |
| 398 dst2.ClearForWrite(vform); |
| 399 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); |
| 400 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 401 dst1.ReadUintFromMem(vform, i, addr); |
| 402 dst2.ReadUintFromMem(vform, i, addr2); |
| 403 } |
| 404 } |
| 405 |
| 406 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1, |
| 407 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) { |
| 408 dst1.ClearForWrite(vform); |
| 409 dst2.ClearForWrite(vform); |
| 410 dst3.ClearForWrite(vform); |
| 411 int esize = LaneSizeInBytesFromFormat(vform); |
| 412 uint64_t addr2 = addr1 + esize; |
| 413 uint64_t addr3 = addr2 + esize; |
| 414 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 415 dst1.ReadUintFromMem(vform, i, addr1); |
| 416 dst2.ReadUintFromMem(vform, i, addr2); |
| 417 dst3.ReadUintFromMem(vform, i, addr3); |
| 418 addr1 += 3 * esize; |
| 419 addr2 += 3 * esize; |
| 420 addr3 += 3 * esize; |
| 421 } |
| 422 } |
| 423 |
| 424 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1, |
| 425 LogicVRegister dst2, LogicVRegister dst3, int index, |
| 426 uint64_t addr1) { |
| 427 dst1.ClearForWrite(vform); |
| 428 dst2.ClearForWrite(vform); |
| 429 dst3.ClearForWrite(vform); |
| 430 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); |
| 431 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); |
| 432 dst1.ReadUintFromMem(vform, index, addr1); |
| 433 dst2.ReadUintFromMem(vform, index, addr2); |
| 434 dst3.ReadUintFromMem(vform, index, addr3); |
| 435 } |
| 436 |
| 437 void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1, |
| 438 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) { |
| 439 dst1.ClearForWrite(vform); |
| 440 dst2.ClearForWrite(vform); |
| 441 dst3.ClearForWrite(vform); |
| 442 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); |
| 443 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); |
| 444 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 445 dst1.ReadUintFromMem(vform, i, addr); |
| 446 dst2.ReadUintFromMem(vform, i, addr2); |
| 447 dst3.ReadUintFromMem(vform, i, addr3); |
| 448 } |
| 449 } |
| 450 |
| 451 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1, |
| 452 LogicVRegister dst2, LogicVRegister dst3, |
| 453 LogicVRegister dst4, uint64_t addr1) { |
| 454 dst1.ClearForWrite(vform); |
| 455 dst2.ClearForWrite(vform); |
| 456 dst3.ClearForWrite(vform); |
| 457 dst4.ClearForWrite(vform); |
| 458 int esize = LaneSizeInBytesFromFormat(vform); |
| 459 uint64_t addr2 = addr1 + esize; |
| 460 uint64_t addr3 = addr2 + esize; |
| 461 uint64_t addr4 = addr3 + esize; |
| 462 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 463 dst1.ReadUintFromMem(vform, i, addr1); |
| 464 dst2.ReadUintFromMem(vform, i, addr2); |
| 465 dst3.ReadUintFromMem(vform, i, addr3); |
| 466 dst4.ReadUintFromMem(vform, i, addr4); |
| 467 addr1 += 4 * esize; |
| 468 addr2 += 4 * esize; |
| 469 addr3 += 4 * esize; |
| 470 addr4 += 4 * esize; |
| 471 } |
| 472 } |
| 473 |
| 474 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1, |
| 475 LogicVRegister dst2, LogicVRegister dst3, |
| 476 LogicVRegister dst4, int index, uint64_t addr1) { |
| 477 dst1.ClearForWrite(vform); |
| 478 dst2.ClearForWrite(vform); |
| 479 dst3.ClearForWrite(vform); |
| 480 dst4.ClearForWrite(vform); |
| 481 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); |
| 482 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); |
| 483 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); |
| 484 dst1.ReadUintFromMem(vform, index, addr1); |
| 485 dst2.ReadUintFromMem(vform, index, addr2); |
| 486 dst3.ReadUintFromMem(vform, index, addr3); |
| 487 dst4.ReadUintFromMem(vform, index, addr4); |
| 488 } |
| 489 |
| 490 void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1, |
| 491 LogicVRegister dst2, LogicVRegister dst3, |
| 492 LogicVRegister dst4, uint64_t addr) { |
| 493 dst1.ClearForWrite(vform); |
| 494 dst2.ClearForWrite(vform); |
| 495 dst3.ClearForWrite(vform); |
| 496 dst4.ClearForWrite(vform); |
| 497 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); |
| 498 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); |
| 499 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); |
| 500 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 501 dst1.ReadUintFromMem(vform, i, addr); |
| 502 dst2.ReadUintFromMem(vform, i, addr2); |
| 503 dst3.ReadUintFromMem(vform, i, addr3); |
| 504 dst4.ReadUintFromMem(vform, i, addr4); |
| 505 } |
| 506 } |
| 507 |
| 508 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) { |
| 509 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 510 src.WriteUintToMem(vform, i, addr); |
| 511 addr += LaneSizeInBytesFromFormat(vform); |
| 512 } |
| 513 } |
| 514 |
| 515 void Simulator::st1(VectorFormat vform, LogicVRegister src, int index, |
| 516 uint64_t addr) { |
| 517 src.WriteUintToMem(vform, index, addr); |
| 518 } |
| 519 |
| 520 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, |
| 521 uint64_t addr) { |
| 522 int esize = LaneSizeInBytesFromFormat(vform); |
| 523 uint64_t addr2 = addr + esize; |
| 524 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 525 dst.WriteUintToMem(vform, i, addr); |
| 526 dst2.WriteUintToMem(vform, i, addr2); |
| 527 addr += 2 * esize; |
| 528 addr2 += 2 * esize; |
| 529 } |
| 530 } |
| 531 |
| 532 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, |
| 533 int index, uint64_t addr) { |
| 534 int esize = LaneSizeInBytesFromFormat(vform); |
| 535 dst.WriteUintToMem(vform, index, addr); |
| 536 dst2.WriteUintToMem(vform, index, addr + 1 * esize); |
| 537 } |
| 538 |
| 539 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, |
| 540 LogicVRegister dst3, uint64_t addr) { |
| 541 int esize = LaneSizeInBytesFromFormat(vform); |
| 542 uint64_t addr2 = addr + esize; |
| 543 uint64_t addr3 = addr2 + esize; |
| 544 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 545 dst.WriteUintToMem(vform, i, addr); |
| 546 dst2.WriteUintToMem(vform, i, addr2); |
| 547 dst3.WriteUintToMem(vform, i, addr3); |
| 548 addr += 3 * esize; |
| 549 addr2 += 3 * esize; |
| 550 addr3 += 3 * esize; |
| 551 } |
| 552 } |
| 553 |
| 554 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, |
| 555 LogicVRegister dst3, int index, uint64_t addr) { |
| 556 int esize = LaneSizeInBytesFromFormat(vform); |
| 557 dst.WriteUintToMem(vform, index, addr); |
| 558 dst2.WriteUintToMem(vform, index, addr + 1 * esize); |
| 559 dst3.WriteUintToMem(vform, index, addr + 2 * esize); |
| 560 } |
| 561 |
| 562 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, |
| 563 LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) { |
| 564 int esize = LaneSizeInBytesFromFormat(vform); |
| 565 uint64_t addr2 = addr + esize; |
| 566 uint64_t addr3 = addr2 + esize; |
| 567 uint64_t addr4 = addr3 + esize; |
| 568 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 569 dst.WriteUintToMem(vform, i, addr); |
| 570 dst2.WriteUintToMem(vform, i, addr2); |
| 571 dst3.WriteUintToMem(vform, i, addr3); |
| 572 dst4.WriteUintToMem(vform, i, addr4); |
| 573 addr += 4 * esize; |
| 574 addr2 += 4 * esize; |
| 575 addr3 += 4 * esize; |
| 576 addr4 += 4 * esize; |
| 577 } |
| 578 } |
| 579 |
| 580 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, |
| 581 LogicVRegister dst3, LogicVRegister dst4, int index, |
| 582 uint64_t addr) { |
| 583 int esize = LaneSizeInBytesFromFormat(vform); |
| 584 dst.WriteUintToMem(vform, index, addr); |
| 585 dst2.WriteUintToMem(vform, index, addr + 1 * esize); |
| 586 dst3.WriteUintToMem(vform, index, addr + 2 * esize); |
| 587 dst4.WriteUintToMem(vform, index, addr + 3 * esize); |
| 588 } |
| 589 |
| 590 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst, |
| 591 const LogicVRegister& src1, |
| 592 const LogicVRegister& src2, Condition cond) { |
| 593 dst.ClearForWrite(vform); |
| 594 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 595 int64_t sa = src1.Int(vform, i); |
| 596 int64_t sb = src2.Int(vform, i); |
| 597 uint64_t ua = src1.Uint(vform, i); |
| 598 uint64_t ub = src2.Uint(vform, i); |
| 599 bool result = false; |
| 600 switch (cond) { |
| 601 case eq: |
| 602 result = (ua == ub); |
| 603 break; |
| 604 case ge: |
| 605 result = (sa >= sb); |
| 606 break; |
| 607 case gt: |
| 608 result = (sa > sb); |
| 609 break; |
| 610 case hi: |
| 611 result = (ua > ub); |
| 612 break; |
| 613 case hs: |
| 614 result = (ua >= ub); |
| 615 break; |
| 616 case lt: |
| 617 result = (sa < sb); |
| 618 break; |
| 619 case le: |
| 620 result = (sa <= sb); |
| 621 break; |
| 622 default: |
| 623 UNREACHABLE(); |
| 624 break; |
| 625 } |
| 626 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); |
| 627 } |
| 628 return dst; |
| 629 } |
| 630 |
| 631 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst, |
| 632 const LogicVRegister& src1, int imm, |
| 633 Condition cond) { |
| 634 SimVRegister temp; |
| 635 LogicVRegister imm_reg = dup_immediate(vform, temp, imm); |
| 636 return cmp(vform, dst, src1, imm_reg, cond); |
| 637 } |
| 638 |
| 639 LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst, |
| 640 const LogicVRegister& src1, |
| 641 const LogicVRegister& src2) { |
| 642 dst.ClearForWrite(vform); |
| 643 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 644 uint64_t ua = src1.Uint(vform, i); |
| 645 uint64_t ub = src2.Uint(vform, i); |
| 646 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0); |
| 647 } |
| 648 return dst; |
| 649 } |
| 650 |
| 651 LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst, |
| 652 const LogicVRegister& src1, |
| 653 const LogicVRegister& src2) { |
| 654 int lane_size = LaneSizeInBitsFromFormat(vform); |
| 655 dst.ClearForWrite(vform); |
| 656 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 657 // Test for unsigned saturation. |
| 658 uint64_t ua = src1.UintLeftJustified(vform, i); |
| 659 uint64_t ub = src2.UintLeftJustified(vform, i); |
| 660 uint64_t ur = ua + ub; |
| 661 if (ur < ua) { |
| 662 dst.SetUnsignedSat(i, true); |
| 663 } |
| 664 |
| 665 // Test for signed saturation. |
| 666 bool pos_a = (ua >> 63) == 0; |
| 667 bool pos_b = (ub >> 63) == 0; |
| 668 bool pos_r = (ur >> 63) == 0; |
| 669 // If the signs of the operands are the same, but different from the result, |
| 670 // there was an overflow. |
| 671 if ((pos_a == pos_b) && (pos_a != pos_r)) { |
| 672 dst.SetSignedSat(i, pos_a); |
| 673 } |
| 674 |
| 675 dst.SetInt(vform, i, ur >> (64 - lane_size)); |
| 676 } |
| 677 return dst; |
| 678 } |
| 679 |
| 680 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst, |
| 681 const LogicVRegister& src1, |
| 682 const LogicVRegister& src2) { |
| 683 SimVRegister temp1, temp2; |
| 684 uzp1(vform, temp1, src1, src2); |
| 685 uzp2(vform, temp2, src1, src2); |
| 686 add(vform, dst, temp1, temp2); |
| 687 return dst; |
| 688 } |
| 689 |
| 690 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst, |
| 691 const LogicVRegister& src1, |
| 692 const LogicVRegister& src2) { |
| 693 SimVRegister temp; |
| 694 mul(vform, temp, src1, src2); |
| 695 add(vform, dst, dst, temp); |
| 696 return dst; |
| 697 } |
| 698 |
| 699 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst, |
| 700 const LogicVRegister& src1, |
| 701 const LogicVRegister& src2) { |
| 702 SimVRegister temp; |
| 703 mul(vform, temp, src1, src2); |
| 704 sub(vform, dst, dst, temp); |
| 705 return dst; |
| 706 } |
| 707 |
| 708 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst, |
| 709 const LogicVRegister& src1, |
| 710 const LogicVRegister& src2) { |
| 711 dst.ClearForWrite(vform); |
| 712 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 713 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i)); |
| 714 } |
| 715 return dst; |
| 716 } |
| 717 |
| 718 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst, |
| 719 const LogicVRegister& src1, |
| 720 const LogicVRegister& src2, int index) { |
| 721 SimVRegister temp; |
| 722 VectorFormat indexform = VectorFormatFillQ(vform); |
| 723 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 724 } |
| 725 |
| 726 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst, |
| 727 const LogicVRegister& src1, |
| 728 const LogicVRegister& src2, int index) { |
| 729 SimVRegister temp; |
| 730 VectorFormat indexform = VectorFormatFillQ(vform); |
| 731 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 732 } |
| 733 |
| 734 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst, |
| 735 const LogicVRegister& src1, |
| 736 const LogicVRegister& src2, int index) { |
| 737 SimVRegister temp; |
| 738 VectorFormat indexform = VectorFormatFillQ(vform); |
| 739 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 740 } |
| 741 |
| 742 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst, |
| 743 const LogicVRegister& src1, |
| 744 const LogicVRegister& src2, int index) { |
| 745 SimVRegister temp; |
| 746 VectorFormat indexform = |
| 747 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); |
| 748 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 749 } |
| 750 |
| 751 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst, |
| 752 const LogicVRegister& src1, |
| 753 const LogicVRegister& src2, int index) { |
| 754 SimVRegister temp; |
| 755 VectorFormat indexform = |
| 756 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); |
| 757 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 758 } |
| 759 |
| 760 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst, |
| 761 const LogicVRegister& src1, |
| 762 const LogicVRegister& src2, int index) { |
| 763 SimVRegister temp; |
| 764 VectorFormat indexform = |
| 765 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); |
| 766 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 767 } |
| 768 |
| 769 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst, |
| 770 const LogicVRegister& src1, |
| 771 const LogicVRegister& src2, int index) { |
| 772 SimVRegister temp; |
| 773 VectorFormat indexform = |
| 774 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); |
| 775 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 776 } |
| 777 |
| 778 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst, |
| 779 const LogicVRegister& src1, |
| 780 const LogicVRegister& src2, int index) { |
| 781 SimVRegister temp; |
| 782 VectorFormat indexform = |
| 783 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); |
| 784 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 785 } |
| 786 |
| 787 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst, |
| 788 const LogicVRegister& src1, |
| 789 const LogicVRegister& src2, int index) { |
| 790 SimVRegister temp; |
| 791 VectorFormat indexform = |
| 792 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); |
| 793 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 794 } |
| 795 |
| 796 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst, |
| 797 const LogicVRegister& src1, |
| 798 const LogicVRegister& src2, int index) { |
| 799 SimVRegister temp; |
| 800 VectorFormat indexform = |
| 801 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); |
| 802 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 803 } |
| 804 |
| 805 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst, |
| 806 const LogicVRegister& src1, |
| 807 const LogicVRegister& src2, int index) { |
| 808 SimVRegister temp; |
| 809 VectorFormat indexform = |
| 810 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); |
| 811 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 812 } |
| 813 |
| 814 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst, |
| 815 const LogicVRegister& src1, |
| 816 const LogicVRegister& src2, int index) { |
| 817 SimVRegister temp; |
| 818 VectorFormat indexform = |
| 819 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); |
| 820 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 821 } |
| 822 |
| 823 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst, |
| 824 const LogicVRegister& src1, |
| 825 const LogicVRegister& src2, int index) { |
| 826 SimVRegister temp; |
| 827 VectorFormat indexform = |
| 828 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); |
| 829 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 830 } |
| 831 |
| 832 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst, |
| 833 const LogicVRegister& src1, |
| 834 const LogicVRegister& src2, int index) { |
| 835 SimVRegister temp; |
| 836 VectorFormat indexform = |
| 837 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); |
| 838 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 839 } |
| 840 |
| 841 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst, |
| 842 const LogicVRegister& src1, |
| 843 const LogicVRegister& src2, int index) { |
| 844 SimVRegister temp; |
| 845 VectorFormat indexform = |
| 846 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); |
| 847 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 848 } |
| 849 |
| 850 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst, |
| 851 const LogicVRegister& src1, |
| 852 const LogicVRegister& src2, int index) { |
| 853 SimVRegister temp; |
| 854 VectorFormat indexform = |
| 855 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); |
| 856 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 857 } |
| 858 |
| 859 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst, |
| 860 const LogicVRegister& src1, |
| 861 const LogicVRegister& src2, int index) { |
| 862 SimVRegister temp; |
| 863 VectorFormat indexform = |
| 864 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); |
| 865 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 866 } |
| 867 |
| 868 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst, |
| 869 const LogicVRegister& src1, |
| 870 const LogicVRegister& src2, int index) { |
| 871 SimVRegister temp; |
| 872 VectorFormat indexform = |
| 873 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); |
| 874 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 875 } |
| 876 |
| 877 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst, |
| 878 const LogicVRegister& src1, |
| 879 const LogicVRegister& src2, int index) { |
| 880 SimVRegister temp; |
| 881 VectorFormat indexform = |
| 882 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); |
| 883 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 884 } |
| 885 |
| 886 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst, |
| 887 const LogicVRegister& src1, |
| 888 const LogicVRegister& src2, int index) { |
| 889 SimVRegister temp; |
| 890 VectorFormat indexform = |
| 891 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); |
| 892 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 893 } |
| 894 |
| 895 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst, |
| 896 const LogicVRegister& src1, |
| 897 const LogicVRegister& src2, int index) { |
| 898 SimVRegister temp; |
| 899 VectorFormat indexform = |
| 900 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); |
| 901 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 902 } |
| 903 |
| 904 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst, |
| 905 const LogicVRegister& src1, |
| 906 const LogicVRegister& src2, int index) { |
| 907 SimVRegister temp; |
| 908 VectorFormat indexform = VectorFormatFillQ(vform); |
| 909 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 910 } |
| 911 |
| 912 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst, |
| 913 const LogicVRegister& src1, |
| 914 const LogicVRegister& src2, int index) { |
| 915 SimVRegister temp; |
| 916 VectorFormat indexform = VectorFormatFillQ(vform); |
| 917 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); |
| 918 } |
| 919 |
| 920 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) { |
| 921 uint16_t result = 0; |
| 922 uint16_t extended_op2 = op2; |
| 923 for (int i = 0; i < 8; ++i) { |
| 924 if ((op1 >> i) & 1) { |
| 925 result = result ^ (extended_op2 << i); |
| 926 } |
| 927 } |
| 928 return result; |
| 929 } |
| 930 |
| 931 LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst, |
| 932 const LogicVRegister& src1, |
| 933 const LogicVRegister& src2) { |
| 934 dst.ClearForWrite(vform); |
| 935 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 936 dst.SetUint(vform, i, |
| 937 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i))); |
| 938 } |
| 939 return dst; |
| 940 } |
| 941 |
| 942 LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst, |
| 943 const LogicVRegister& src1, |
| 944 const LogicVRegister& src2) { |
| 945 VectorFormat vform_src = VectorFormatHalfWidth(vform); |
| 946 dst.ClearForWrite(vform); |
| 947 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 948 dst.SetUint( |
| 949 vform, i, |
| 950 PolynomialMult(src1.Uint(vform_src, i), src2.Uint(vform_src, i))); |
| 951 } |
| 952 return dst; |
| 953 } |
| 954 |
| 955 LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst, |
| 956 const LogicVRegister& src1, |
| 957 const LogicVRegister& src2) { |
| 958 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform); |
| 959 dst.ClearForWrite(vform); |
| 960 int lane_count = LaneCountFromFormat(vform); |
| 961 for (int i = 0; i < lane_count; i++) { |
| 962 dst.SetUint(vform, i, |
| 963 PolynomialMult(src1.Uint(vform_src, lane_count + i), |
| 964 src2.Uint(vform_src, lane_count + i))); |
| 965 } |
| 966 return dst; |
| 967 } |
| 968 |
| 969 LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst, |
| 970 const LogicVRegister& src1, |
| 971 const LogicVRegister& src2) { |
| 972 int lane_size = LaneSizeInBitsFromFormat(vform); |
| 973 dst.ClearForWrite(vform); |
| 974 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 975 // Test for unsigned saturation. |
| 976 uint64_t ua = src1.UintLeftJustified(vform, i); |
| 977 uint64_t ub = src2.UintLeftJustified(vform, i); |
| 978 uint64_t ur = ua - ub; |
| 979 if (ub > ua) { |
| 980 dst.SetUnsignedSat(i, false); |
| 981 } |
| 982 |
| 983 // Test for signed saturation. |
| 984 bool pos_a = (ua >> 63) == 0; |
| 985 bool pos_b = (ub >> 63) == 0; |
| 986 bool pos_r = (ur >> 63) == 0; |
| 987 // If the signs of the operands are different, and the sign of the first |
| 988 // operand doesn't match the result, there was an overflow. |
| 989 if ((pos_a != pos_b) && (pos_a != pos_r)) { |
| 990 dst.SetSignedSat(i, pos_a); |
| 991 } |
| 992 |
| 993 dst.SetInt(vform, i, ur >> (64 - lane_size)); |
| 994 } |
| 995 return dst; |
| 996 } |
| 997 |
| 998 LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst, |
| 999 const LogicVRegister& src1, |
| 1000 const LogicVRegister& src2) { |
| 1001 dst.ClearForWrite(vform); |
| 1002 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1003 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i)); |
| 1004 } |
| 1005 return dst; |
| 1006 } |
| 1007 |
| 1008 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst, |
| 1009 const LogicVRegister& src1, |
| 1010 const LogicVRegister& src2) { |
| 1011 dst.ClearForWrite(vform); |
| 1012 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1013 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i)); |
| 1014 } |
| 1015 return dst; |
| 1016 } |
| 1017 |
| 1018 LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst, |
| 1019 const LogicVRegister& src1, |
| 1020 const LogicVRegister& src2) { |
| 1021 dst.ClearForWrite(vform); |
| 1022 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1023 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i)); |
| 1024 } |
| 1025 return dst; |
| 1026 } |
| 1027 |
| 1028 LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst, |
| 1029 const LogicVRegister& src1, |
| 1030 const LogicVRegister& src2) { |
| 1031 dst.ClearForWrite(vform); |
| 1032 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1033 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i)); |
| 1034 } |
| 1035 return dst; |
| 1036 } |
| 1037 |
| 1038 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst, |
| 1039 const LogicVRegister& src1, |
| 1040 const LogicVRegister& src2) { |
| 1041 dst.ClearForWrite(vform); |
| 1042 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1043 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i)); |
| 1044 } |
| 1045 return dst; |
| 1046 } |
| 1047 |
| 1048 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst, |
| 1049 const LogicVRegister& src, uint64_t imm) { |
| 1050 uint64_t result[16]; |
| 1051 int laneCount = LaneCountFromFormat(vform); |
| 1052 for (int i = 0; i < laneCount; ++i) { |
| 1053 result[i] = src.Uint(vform, i) & ~imm; |
| 1054 } |
| 1055 dst.SetUintArray(vform, result); |
| 1056 return dst; |
| 1057 } |
| 1058 |
| 1059 LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst, |
| 1060 const LogicVRegister& src1, |
| 1061 const LogicVRegister& src2) { |
| 1062 dst.ClearForWrite(vform); |
| 1063 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1064 uint64_t operand1 = dst.Uint(vform, i); |
| 1065 uint64_t operand2 = ~src2.Uint(vform, i); |
| 1066 uint64_t operand3 = src1.Uint(vform, i); |
| 1067 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); |
| 1068 dst.SetUint(vform, i, result); |
| 1069 } |
| 1070 return dst; |
| 1071 } |
| 1072 |
| 1073 LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst, |
| 1074 const LogicVRegister& src1, |
| 1075 const LogicVRegister& src2) { |
| 1076 dst.ClearForWrite(vform); |
| 1077 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1078 uint64_t operand1 = dst.Uint(vform, i); |
| 1079 uint64_t operand2 = src2.Uint(vform, i); |
| 1080 uint64_t operand3 = src1.Uint(vform, i); |
| 1081 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); |
| 1082 dst.SetUint(vform, i, result); |
| 1083 } |
| 1084 return dst; |
| 1085 } |
| 1086 |
| 1087 LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst, |
| 1088 const LogicVRegister& src1, |
| 1089 const LogicVRegister& src2) { |
| 1090 dst.ClearForWrite(vform); |
| 1091 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1092 uint64_t operand1 = src2.Uint(vform, i); |
| 1093 uint64_t operand2 = dst.Uint(vform, i); |
| 1094 uint64_t operand3 = src1.Uint(vform, i); |
| 1095 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); |
| 1096 dst.SetUint(vform, i, result); |
| 1097 } |
| 1098 return dst; |
| 1099 } |
| 1100 |
| 1101 LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst, |
| 1102 const LogicVRegister& src1, |
| 1103 const LogicVRegister& src2, bool max) { |
| 1104 dst.ClearForWrite(vform); |
| 1105 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1106 int64_t src1_val = src1.Int(vform, i); |
| 1107 int64_t src2_val = src2.Int(vform, i); |
| 1108 int64_t dst_val; |
| 1109 if (max) { |
| 1110 dst_val = (src1_val > src2_val) ? src1_val : src2_val; |
| 1111 } else { |
| 1112 dst_val = (src1_val < src2_val) ? src1_val : src2_val; |
| 1113 } |
| 1114 dst.SetInt(vform, i, dst_val); |
| 1115 } |
| 1116 return dst; |
| 1117 } |
| 1118 |
| 1119 LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst, |
| 1120 const LogicVRegister& src1, |
| 1121 const LogicVRegister& src2) { |
| 1122 return SMinMax(vform, dst, src1, src2, true); |
| 1123 } |
| 1124 |
| 1125 LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst, |
| 1126 const LogicVRegister& src1, |
| 1127 const LogicVRegister& src2) { |
| 1128 return SMinMax(vform, dst, src1, src2, false); |
| 1129 } |
| 1130 |
| 1131 LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst, |
| 1132 const LogicVRegister& src1, |
| 1133 const LogicVRegister& src2, bool max) { |
| 1134 int lanes = LaneCountFromFormat(vform); |
| 1135 int64_t result[kMaxLanesPerVector]; |
| 1136 const LogicVRegister* src = &src1; |
| 1137 for (int j = 0; j < 2; j++) { |
| 1138 for (int i = 0; i < lanes; i += 2) { |
| 1139 int64_t first_val = src->Int(vform, i); |
| 1140 int64_t second_val = src->Int(vform, i + 1); |
| 1141 int64_t dst_val; |
| 1142 if (max) { |
| 1143 dst_val = (first_val > second_val) ? first_val : second_val; |
| 1144 } else { |
| 1145 dst_val = (first_val < second_val) ? first_val : second_val; |
| 1146 } |
| 1147 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector); |
| 1148 result[(i >> 1) + (j * lanes / 2)] = dst_val; |
| 1149 } |
| 1150 src = &src2; |
| 1151 } |
| 1152 dst.SetIntArray(vform, result); |
| 1153 return dst; |
| 1154 } |
| 1155 |
| 1156 LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst, |
| 1157 const LogicVRegister& src1, |
| 1158 const LogicVRegister& src2) { |
| 1159 return SMinMaxP(vform, dst, src1, src2, true); |
| 1160 } |
| 1161 |
| 1162 LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst, |
| 1163 const LogicVRegister& src1, |
| 1164 const LogicVRegister& src2) { |
| 1165 return SMinMaxP(vform, dst, src1, src2, false); |
| 1166 } |
| 1167 |
| 1168 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst, |
| 1169 const LogicVRegister& src) { |
| 1170 DCHECK_EQ(vform, kFormatD); |
| 1171 |
| 1172 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1); |
| 1173 dst.ClearForWrite(vform); |
| 1174 dst.SetUint(vform, 0, dst_val); |
| 1175 return dst; |
| 1176 } |
| 1177 |
| 1178 LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst, |
| 1179 const LogicVRegister& src) { |
| 1180 VectorFormat vform_dst = |
| 1181 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); |
| 1182 |
| 1183 int64_t dst_val = 0; |
| 1184 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1185 dst_val += src.Int(vform, i); |
| 1186 } |
| 1187 |
| 1188 dst.ClearForWrite(vform_dst); |
| 1189 dst.SetInt(vform_dst, 0, dst_val); |
| 1190 return dst; |
| 1191 } |
| 1192 |
| 1193 LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst, |
| 1194 const LogicVRegister& src) { |
| 1195 VectorFormat vform_dst = |
| 1196 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); |
| 1197 |
| 1198 int64_t dst_val = 0; |
| 1199 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1200 dst_val += src.Int(vform, i); |
| 1201 } |
| 1202 |
| 1203 dst.ClearForWrite(vform_dst); |
| 1204 dst.SetInt(vform_dst, 0, dst_val); |
| 1205 return dst; |
| 1206 } |
| 1207 |
| 1208 LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst, |
| 1209 const LogicVRegister& src) { |
| 1210 VectorFormat vform_dst = |
| 1211 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); |
| 1212 |
| 1213 uint64_t dst_val = 0; |
| 1214 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1215 dst_val += src.Uint(vform, i); |
| 1216 } |
| 1217 |
| 1218 dst.ClearForWrite(vform_dst); |
| 1219 dst.SetUint(vform_dst, 0, dst_val); |
| 1220 return dst; |
| 1221 } |
| 1222 |
| 1223 LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst, |
| 1224 const LogicVRegister& src, bool max) { |
| 1225 int64_t dst_val = max ? INT64_MIN : INT64_MAX; |
| 1226 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1227 int64_t src_val = src.Int(vform, i); |
| 1228 if (max) { |
| 1229 dst_val = (src_val > dst_val) ? src_val : dst_val; |
| 1230 } else { |
| 1231 dst_val = (src_val < dst_val) ? src_val : dst_val; |
| 1232 } |
| 1233 } |
| 1234 dst.ClearForWrite(ScalarFormatFromFormat(vform)); |
| 1235 dst.SetInt(vform, 0, dst_val); |
| 1236 return dst; |
| 1237 } |
| 1238 |
| 1239 LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst, |
| 1240 const LogicVRegister& src) { |
| 1241 SMinMaxV(vform, dst, src, true); |
| 1242 return dst; |
| 1243 } |
| 1244 |
| 1245 LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst, |
| 1246 const LogicVRegister& src) { |
| 1247 SMinMaxV(vform, dst, src, false); |
| 1248 return dst; |
| 1249 } |
| 1250 |
| 1251 LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst, |
| 1252 const LogicVRegister& src1, |
| 1253 const LogicVRegister& src2, bool max) { |
| 1254 dst.ClearForWrite(vform); |
| 1255 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1256 uint64_t src1_val = src1.Uint(vform, i); |
| 1257 uint64_t src2_val = src2.Uint(vform, i); |
| 1258 uint64_t dst_val; |
| 1259 if (max) { |
| 1260 dst_val = (src1_val > src2_val) ? src1_val : src2_val; |
| 1261 } else { |
| 1262 dst_val = (src1_val < src2_val) ? src1_val : src2_val; |
| 1263 } |
| 1264 dst.SetUint(vform, i, dst_val); |
| 1265 } |
| 1266 return dst; |
| 1267 } |
| 1268 |
| 1269 LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst, |
| 1270 const LogicVRegister& src1, |
| 1271 const LogicVRegister& src2) { |
| 1272 return UMinMax(vform, dst, src1, src2, true); |
| 1273 } |
| 1274 |
| 1275 LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst, |
| 1276 const LogicVRegister& src1, |
| 1277 const LogicVRegister& src2) { |
| 1278 return UMinMax(vform, dst, src1, src2, false); |
| 1279 } |
| 1280 |
| 1281 LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst, |
| 1282 const LogicVRegister& src1, |
| 1283 const LogicVRegister& src2, bool max) { |
| 1284 int lanes = LaneCountFromFormat(vform); |
| 1285 uint64_t result[kMaxLanesPerVector]; |
| 1286 const LogicVRegister* src = &src1; |
| 1287 for (int j = 0; j < 2; j++) { |
| 1288 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { |
| 1289 uint64_t first_val = src->Uint(vform, i); |
| 1290 uint64_t second_val = src->Uint(vform, i + 1); |
| 1291 uint64_t dst_val; |
| 1292 if (max) { |
| 1293 dst_val = (first_val > second_val) ? first_val : second_val; |
| 1294 } else { |
| 1295 dst_val = (first_val < second_val) ? first_val : second_val; |
| 1296 } |
| 1297 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector); |
| 1298 result[(i >> 1) + (j * lanes / 2)] = dst_val; |
| 1299 } |
| 1300 src = &src2; |
| 1301 } |
| 1302 dst.SetUintArray(vform, result); |
| 1303 return dst; |
| 1304 } |
| 1305 |
| 1306 LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst, |
| 1307 const LogicVRegister& src1, |
| 1308 const LogicVRegister& src2) { |
| 1309 return UMinMaxP(vform, dst, src1, src2, true); |
| 1310 } |
| 1311 |
| 1312 LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst, |
| 1313 const LogicVRegister& src1, |
| 1314 const LogicVRegister& src2) { |
| 1315 return UMinMaxP(vform, dst, src1, src2, false); |
| 1316 } |
| 1317 |
| 1318 LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst, |
| 1319 const LogicVRegister& src, bool max) { |
| 1320 uint64_t dst_val = max ? 0 : UINT64_MAX; |
| 1321 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1322 uint64_t src_val = src.Uint(vform, i); |
| 1323 if (max) { |
| 1324 dst_val = (src_val > dst_val) ? src_val : dst_val; |
| 1325 } else { |
| 1326 dst_val = (src_val < dst_val) ? src_val : dst_val; |
| 1327 } |
| 1328 } |
| 1329 dst.ClearForWrite(ScalarFormatFromFormat(vform)); |
| 1330 dst.SetUint(vform, 0, dst_val); |
| 1331 return dst; |
| 1332 } |
| 1333 |
| 1334 LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst, |
| 1335 const LogicVRegister& src) { |
| 1336 UMinMaxV(vform, dst, src, true); |
| 1337 return dst; |
| 1338 } |
| 1339 |
| 1340 LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst, |
| 1341 const LogicVRegister& src) { |
| 1342 UMinMaxV(vform, dst, src, false); |
| 1343 return dst; |
| 1344 } |
| 1345 |
| 1346 LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst, |
| 1347 const LogicVRegister& src, int shift) { |
| 1348 DCHECK_GE(shift, 0); |
| 1349 SimVRegister temp; |
| 1350 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); |
| 1351 return ushl(vform, dst, src, shiftreg); |
| 1352 } |
| 1353 |
| 1354 LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst, |
| 1355 const LogicVRegister& src, int shift) { |
| 1356 DCHECK_GE(shift, 0); |
| 1357 SimVRegister temp1, temp2; |
| 1358 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); |
| 1359 LogicVRegister extendedreg = sxtl(vform, temp2, src); |
| 1360 return sshl(vform, dst, extendedreg, shiftreg); |
| 1361 } |
| 1362 |
| 1363 LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst, |
| 1364 const LogicVRegister& src, int shift) { |
| 1365 DCHECK_GE(shift, 0); |
| 1366 SimVRegister temp1, temp2; |
| 1367 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); |
| 1368 LogicVRegister extendedreg = sxtl2(vform, temp2, src); |
| 1369 return sshl(vform, dst, extendedreg, shiftreg); |
| 1370 } |
| 1371 |
| 1372 LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst, |
| 1373 const LogicVRegister& src) { |
| 1374 int shift = LaneSizeInBitsFromFormat(vform) / 2; |
| 1375 return sshll(vform, dst, src, shift); |
| 1376 } |
| 1377 |
| 1378 LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst, |
| 1379 const LogicVRegister& src) { |
| 1380 int shift = LaneSizeInBitsFromFormat(vform) / 2; |
| 1381 return sshll2(vform, dst, src, shift); |
| 1382 } |
| 1383 |
| 1384 LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst, |
| 1385 const LogicVRegister& src, int shift) { |
| 1386 DCHECK_GE(shift, 0); |
| 1387 SimVRegister temp1, temp2; |
| 1388 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); |
| 1389 LogicVRegister extendedreg = uxtl(vform, temp2, src); |
| 1390 return ushl(vform, dst, extendedreg, shiftreg); |
| 1391 } |
| 1392 |
| 1393 LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst, |
| 1394 const LogicVRegister& src, int shift) { |
| 1395 DCHECK_GE(shift, 0); |
| 1396 SimVRegister temp1, temp2; |
| 1397 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); |
| 1398 LogicVRegister extendedreg = uxtl2(vform, temp2, src); |
| 1399 return ushl(vform, dst, extendedreg, shiftreg); |
| 1400 } |
| 1401 |
| 1402 LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst, |
| 1403 const LogicVRegister& src, int shift) { |
| 1404 dst.ClearForWrite(vform); |
| 1405 int laneCount = LaneCountFromFormat(vform); |
| 1406 for (int i = 0; i < laneCount; i++) { |
| 1407 uint64_t src_lane = src.Uint(vform, i); |
| 1408 uint64_t dst_lane = dst.Uint(vform, i); |
| 1409 uint64_t shifted = src_lane << shift; |
| 1410 uint64_t mask = MaxUintFromFormat(vform) << shift; |
| 1411 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); |
| 1412 } |
| 1413 return dst; |
| 1414 } |
| 1415 |
| 1416 LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst, |
| 1417 const LogicVRegister& src, int shift) { |
| 1418 DCHECK_GE(shift, 0); |
| 1419 SimVRegister temp; |
| 1420 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); |
| 1421 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform); |
| 1422 } |
| 1423 |
| 1424 LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst, |
| 1425 const LogicVRegister& src, int shift) { |
| 1426 DCHECK_GE(shift, 0); |
| 1427 SimVRegister temp; |
| 1428 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); |
| 1429 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform); |
| 1430 } |
| 1431 |
| 1432 LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst, |
| 1433 const LogicVRegister& src, int shift) { |
| 1434 DCHECK_GE(shift, 0); |
| 1435 SimVRegister temp; |
| 1436 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); |
| 1437 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform); |
| 1438 } |
| 1439 |
| 1440 LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst, |
| 1441 const LogicVRegister& src, int shift) { |
| 1442 dst.ClearForWrite(vform); |
| 1443 int laneCount = LaneCountFromFormat(vform); |
| 1444 DCHECK((shift > 0) && |
| 1445 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform)))); |
| 1446 for (int i = 0; i < laneCount; i++) { |
| 1447 uint64_t src_lane = src.Uint(vform, i); |
| 1448 uint64_t dst_lane = dst.Uint(vform, i); |
| 1449 uint64_t shifted; |
| 1450 uint64_t mask; |
| 1451 if (shift == 64) { |
| 1452 shifted = 0; |
| 1453 mask = 0; |
| 1454 } else { |
| 1455 shifted = src_lane >> shift; |
| 1456 mask = MaxUintFromFormat(vform) >> shift; |
| 1457 } |
| 1458 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); |
| 1459 } |
| 1460 return dst; |
| 1461 } |
| 1462 |
| 1463 LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst, |
| 1464 const LogicVRegister& src, int shift) { |
| 1465 DCHECK_GE(shift, 0); |
| 1466 SimVRegister temp; |
| 1467 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); |
| 1468 return ushl(vform, dst, src, shiftreg); |
| 1469 } |
| 1470 |
| 1471 LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst, |
| 1472 const LogicVRegister& src, int shift) { |
| 1473 DCHECK_GE(shift, 0); |
| 1474 SimVRegister temp; |
| 1475 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); |
| 1476 return sshl(vform, dst, src, shiftreg); |
| 1477 } |
| 1478 |
| 1479 LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst, |
| 1480 const LogicVRegister& src, int shift) { |
| 1481 SimVRegister temp; |
| 1482 LogicVRegister shifted_reg = sshr(vform, temp, src, shift); |
| 1483 return add(vform, dst, dst, shifted_reg); |
| 1484 } |
| 1485 |
| 1486 LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst, |
| 1487 const LogicVRegister& src, int shift) { |
| 1488 SimVRegister temp; |
| 1489 LogicVRegister shifted_reg = ushr(vform, temp, src, shift); |
| 1490 return add(vform, dst, dst, shifted_reg); |
| 1491 } |
| 1492 |
| 1493 LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst, |
| 1494 const LogicVRegister& src, int shift) { |
| 1495 SimVRegister temp; |
| 1496 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform); |
| 1497 return add(vform, dst, dst, shifted_reg); |
| 1498 } |
| 1499 |
| 1500 LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst, |
| 1501 const LogicVRegister& src, int shift) { |
| 1502 SimVRegister temp; |
| 1503 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform); |
| 1504 return add(vform, dst, dst, shifted_reg); |
| 1505 } |
| 1506 |
| 1507 LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst, |
| 1508 const LogicVRegister& src) { |
| 1509 uint64_t result[16]; |
| 1510 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); |
| 1511 int laneCount = LaneCountFromFormat(vform); |
| 1512 for (int i = 0; i < laneCount; i++) { |
| 1513 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits); |
| 1514 } |
| 1515 |
| 1516 dst.SetUintArray(vform, result); |
| 1517 return dst; |
| 1518 } |
| 1519 |
| 1520 LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst, |
| 1521 const LogicVRegister& src) { |
| 1522 uint64_t result[16]; |
| 1523 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); |
| 1524 int laneCount = LaneCountFromFormat(vform); |
| 1525 for (int i = 0; i < laneCount; i++) { |
| 1526 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits); |
| 1527 } |
| 1528 |
| 1529 dst.SetUintArray(vform, result); |
| 1530 return dst; |
| 1531 } |
| 1532 |
| 1533 LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst, |
| 1534 const LogicVRegister& src) { |
| 1535 uint64_t result[16]; |
| 1536 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); |
| 1537 int laneCount = LaneCountFromFormat(vform); |
| 1538 for (int i = 0; i < laneCount; i++) { |
| 1539 uint64_t value = src.Uint(vform, i); |
| 1540 result[i] = 0; |
| 1541 for (int j = 0; j < laneSizeInBits; j++) { |
| 1542 result[i] += (value & 1); |
| 1543 value >>= 1; |
| 1544 } |
| 1545 } |
| 1546 |
| 1547 dst.SetUintArray(vform, result); |
| 1548 return dst; |
| 1549 } |
| 1550 |
| 1551 LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst, |
| 1552 const LogicVRegister& src1, |
| 1553 const LogicVRegister& src2) { |
| 1554 dst.ClearForWrite(vform); |
| 1555 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1556 int8_t shift_val = src2.Int(vform, i); |
| 1557 int64_t lj_src_val = src1.IntLeftJustified(vform, i); |
| 1558 |
| 1559 // Set signed saturation state. |
| 1560 if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) && |
| 1561 (lj_src_val != 0)) { |
| 1562 dst.SetSignedSat(i, lj_src_val >= 0); |
| 1563 } |
| 1564 |
| 1565 // Set unsigned saturation state. |
| 1566 if (lj_src_val < 0) { |
| 1567 dst.SetUnsignedSat(i, false); |
| 1568 } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && |
| 1569 (lj_src_val != 0)) { |
| 1570 dst.SetUnsignedSat(i, true); |
| 1571 } |
| 1572 |
| 1573 int64_t src_val = src1.Int(vform, i); |
| 1574 bool src_is_negative = src_val < 0; |
| 1575 if (shift_val > 63) { |
| 1576 dst.SetInt(vform, i, 0); |
| 1577 } else if (shift_val < -63) { |
| 1578 dst.SetRounding(i, src_is_negative); |
| 1579 dst.SetInt(vform, i, src_is_negative ? -1 : 0); |
| 1580 } else { |
| 1581 // Use unsigned types for shifts, as behaviour is undefined for signed |
| 1582 // lhs. |
| 1583 uint64_t usrc_val = static_cast<uint64_t>(src_val); |
| 1584 |
| 1585 if (shift_val < 0) { |
| 1586 // Convert to right shift. |
| 1587 shift_val = -shift_val; |
| 1588 |
| 1589 // Set rounding state by testing most-significant bit shifted out. |
| 1590 // Rounding only needed on right shifts. |
| 1591 if (((usrc_val >> (shift_val - 1)) & 1) == 1) { |
| 1592 dst.SetRounding(i, true); |
| 1593 } |
| 1594 |
| 1595 usrc_val >>= shift_val; |
| 1596 |
| 1597 if (src_is_negative) { |
| 1598 // Simulate sign-extension. |
| 1599 usrc_val |= (~UINT64_C(0) << (64 - shift_val)); |
| 1600 } |
| 1601 } else { |
| 1602 usrc_val <<= shift_val; |
| 1603 } |
| 1604 dst.SetUint(vform, i, usrc_val); |
| 1605 } |
| 1606 } |
| 1607 return dst; |
| 1608 } |
| 1609 |
| 1610 LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst, |
| 1611 const LogicVRegister& src1, |
| 1612 const LogicVRegister& src2) { |
| 1613 dst.ClearForWrite(vform); |
| 1614 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1615 int8_t shift_val = src2.Int(vform, i); |
| 1616 uint64_t lj_src_val = src1.UintLeftJustified(vform, i); |
| 1617 |
| 1618 // Set saturation state. |
| 1619 if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) { |
| 1620 dst.SetUnsignedSat(i, true); |
| 1621 } |
| 1622 |
| 1623 uint64_t src_val = src1.Uint(vform, i); |
| 1624 if ((shift_val > 63) || (shift_val < -64)) { |
| 1625 dst.SetUint(vform, i, 0); |
| 1626 } else { |
| 1627 if (shift_val < 0) { |
| 1628 // Set rounding state. Rounding only needed on right shifts. |
| 1629 if (((src_val >> (-shift_val - 1)) & 1) == 1) { |
| 1630 dst.SetRounding(i, true); |
| 1631 } |
| 1632 |
| 1633 if (shift_val == -64) { |
| 1634 src_val = 0; |
| 1635 } else { |
| 1636 src_val >>= -shift_val; |
| 1637 } |
| 1638 } else { |
| 1639 src_val <<= shift_val; |
| 1640 } |
| 1641 dst.SetUint(vform, i, src_val); |
| 1642 } |
| 1643 } |
| 1644 return dst; |
| 1645 } |
| 1646 |
| 1647 LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst, |
| 1648 const LogicVRegister& src) { |
| 1649 dst.ClearForWrite(vform); |
| 1650 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1651 // Test for signed saturation. |
| 1652 int64_t sa = src.Int(vform, i); |
| 1653 if (sa == MinIntFromFormat(vform)) { |
| 1654 dst.SetSignedSat(i, true); |
| 1655 } |
| 1656 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); |
| 1657 } |
| 1658 return dst; |
| 1659 } |
| 1660 |
| 1661 LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst, |
| 1662 const LogicVRegister& src) { |
| 1663 dst.ClearForWrite(vform); |
| 1664 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1665 int64_t sa = dst.IntLeftJustified(vform, i); |
| 1666 uint64_t ub = src.UintLeftJustified(vform, i); |
| 1667 uint64_t ur = sa + ub; |
| 1668 |
| 1669 int64_t sr = bit_cast<int64_t>(ur); |
| 1670 if (sr < sa) { // Test for signed positive saturation. |
| 1671 dst.SetInt(vform, i, MaxIntFromFormat(vform)); |
| 1672 } else { |
| 1673 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i)); |
| 1674 } |
| 1675 } |
| 1676 return dst; |
| 1677 } |
| 1678 |
| 1679 LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst, |
| 1680 const LogicVRegister& src) { |
| 1681 dst.ClearForWrite(vform); |
| 1682 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1683 uint64_t ua = dst.UintLeftJustified(vform, i); |
| 1684 int64_t sb = src.IntLeftJustified(vform, i); |
| 1685 uint64_t ur = ua + sb; |
| 1686 |
| 1687 if ((sb > 0) && (ur <= ua)) { |
| 1688 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation. |
| 1689 } else if ((sb < 0) && (ur >= ua)) { |
| 1690 dst.SetUint(vform, i, 0); // Negative saturation. |
| 1691 } else { |
| 1692 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i)); |
| 1693 } |
| 1694 } |
| 1695 return dst; |
| 1696 } |
| 1697 |
| 1698 LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst, |
| 1699 const LogicVRegister& src) { |
| 1700 dst.ClearForWrite(vform); |
| 1701 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1702 // Test for signed saturation. |
| 1703 int64_t sa = src.Int(vform, i); |
| 1704 if (sa == MinIntFromFormat(vform)) { |
| 1705 dst.SetSignedSat(i, true); |
| 1706 } |
| 1707 if (sa < 0) { |
| 1708 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); |
| 1709 } else { |
| 1710 dst.SetInt(vform, i, sa); |
| 1711 } |
| 1712 } |
| 1713 return dst; |
| 1714 } |
| 1715 |
| 1716 LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform, |
| 1717 LogicVRegister dst, bool dstIsSigned, |
| 1718 const LogicVRegister& src, |
| 1719 bool srcIsSigned) { |
| 1720 bool upperhalf = false; |
| 1721 VectorFormat srcform = kFormatUndefined; |
| 1722 int64_t ssrc[8]; |
| 1723 uint64_t usrc[8]; |
| 1724 |
| 1725 switch (dstform) { |
| 1726 case kFormat8B: |
| 1727 upperhalf = false; |
| 1728 srcform = kFormat8H; |
| 1729 break; |
| 1730 case kFormat16B: |
| 1731 upperhalf = true; |
| 1732 srcform = kFormat8H; |
| 1733 break; |
| 1734 case kFormat4H: |
| 1735 upperhalf = false; |
| 1736 srcform = kFormat4S; |
| 1737 break; |
| 1738 case kFormat8H: |
| 1739 upperhalf = true; |
| 1740 srcform = kFormat4S; |
| 1741 break; |
| 1742 case kFormat2S: |
| 1743 upperhalf = false; |
| 1744 srcform = kFormat2D; |
| 1745 break; |
| 1746 case kFormat4S: |
| 1747 upperhalf = true; |
| 1748 srcform = kFormat2D; |
| 1749 break; |
| 1750 case kFormatB: |
| 1751 upperhalf = false; |
| 1752 srcform = kFormatH; |
| 1753 break; |
| 1754 case kFormatH: |
| 1755 upperhalf = false; |
| 1756 srcform = kFormatS; |
| 1757 break; |
| 1758 case kFormatS: |
| 1759 upperhalf = false; |
| 1760 srcform = kFormatD; |
| 1761 break; |
| 1762 default: |
| 1763 UNIMPLEMENTED(); |
| 1764 } |
| 1765 |
| 1766 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { |
| 1767 ssrc[i] = src.Int(srcform, i); |
| 1768 usrc[i] = src.Uint(srcform, i); |
| 1769 } |
| 1770 |
| 1771 int offset; |
| 1772 if (upperhalf) { |
| 1773 offset = LaneCountFromFormat(dstform) / 2; |
| 1774 } else { |
| 1775 offset = 0; |
| 1776 dst.ClearForWrite(dstform); |
| 1777 } |
| 1778 |
| 1779 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { |
| 1780 // Test for signed saturation |
| 1781 if (ssrc[i] > MaxIntFromFormat(dstform)) { |
| 1782 dst.SetSignedSat(offset + i, true); |
| 1783 } else if (ssrc[i] < MinIntFromFormat(dstform)) { |
| 1784 dst.SetSignedSat(offset + i, false); |
| 1785 } |
| 1786 |
| 1787 // Test for unsigned saturation |
| 1788 if (srcIsSigned) { |
| 1789 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) { |
| 1790 dst.SetUnsignedSat(offset + i, true); |
| 1791 } else if (ssrc[i] < 0) { |
| 1792 dst.SetUnsignedSat(offset + i, false); |
| 1793 } |
| 1794 } else { |
| 1795 if (usrc[i] > MaxUintFromFormat(dstform)) { |
| 1796 dst.SetUnsignedSat(offset + i, true); |
| 1797 } |
| 1798 } |
| 1799 |
| 1800 int64_t result; |
| 1801 if (srcIsSigned) { |
| 1802 result = ssrc[i] & MaxUintFromFormat(dstform); |
| 1803 } else { |
| 1804 result = usrc[i] & MaxUintFromFormat(dstform); |
| 1805 } |
| 1806 |
| 1807 if (dstIsSigned) { |
| 1808 dst.SetInt(dstform, offset + i, result); |
| 1809 } else { |
| 1810 dst.SetUint(dstform, offset + i, result); |
| 1811 } |
| 1812 } |
| 1813 return dst; |
| 1814 } |
| 1815 |
| 1816 LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst, |
| 1817 const LogicVRegister& src) { |
| 1818 return ExtractNarrow(vform, dst, true, src, true); |
| 1819 } |
| 1820 |
| 1821 LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst, |
| 1822 const LogicVRegister& src) { |
| 1823 return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform); |
| 1824 } |
| 1825 |
| 1826 LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst, |
| 1827 const LogicVRegister& src) { |
| 1828 return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform); |
| 1829 } |
| 1830 |
| 1831 LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst, |
| 1832 const LogicVRegister& src) { |
| 1833 return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform); |
| 1834 } |
| 1835 |
| 1836 LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst, |
| 1837 const LogicVRegister& src1, |
| 1838 const LogicVRegister& src2, bool issigned) { |
| 1839 dst.ClearForWrite(vform); |
| 1840 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1841 if (issigned) { |
| 1842 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i); |
| 1843 sr = sr > 0 ? sr : -sr; |
| 1844 dst.SetInt(vform, i, sr); |
| 1845 } else { |
| 1846 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i); |
| 1847 sr = sr > 0 ? sr : -sr; |
| 1848 dst.SetUint(vform, i, sr); |
| 1849 } |
| 1850 } |
| 1851 return dst; |
| 1852 } |
| 1853 |
| 1854 LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst, |
| 1855 const LogicVRegister& src1, |
| 1856 const LogicVRegister& src2) { |
| 1857 SimVRegister temp; |
| 1858 dst.ClearForWrite(vform); |
| 1859 AbsDiff(vform, temp, src1, src2, true); |
| 1860 add(vform, dst, dst, temp); |
| 1861 return dst; |
| 1862 } |
| 1863 |
| 1864 LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst, |
| 1865 const LogicVRegister& src1, |
| 1866 const LogicVRegister& src2) { |
| 1867 SimVRegister temp; |
| 1868 dst.ClearForWrite(vform); |
| 1869 AbsDiff(vform, temp, src1, src2, false); |
| 1870 add(vform, dst, dst, temp); |
| 1871 return dst; |
| 1872 } |
| 1873 |
| 1874 LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst, |
| 1875 const LogicVRegister& src) { |
| 1876 dst.ClearForWrite(vform); |
| 1877 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 1878 dst.SetUint(vform, i, ~src.Uint(vform, i)); |
| 1879 } |
| 1880 return dst; |
| 1881 } |
| 1882 |
| 1883 LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst, |
| 1884 const LogicVRegister& src) { |
| 1885 uint64_t result[16]; |
| 1886 int laneCount = LaneCountFromFormat(vform); |
| 1887 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); |
| 1888 uint64_t reversed_value; |
| 1889 uint64_t value; |
| 1890 for (int i = 0; i < laneCount; i++) { |
| 1891 value = src.Uint(vform, i); |
| 1892 reversed_value = 0; |
| 1893 for (int j = 0; j < laneSizeInBits; j++) { |
| 1894 reversed_value = (reversed_value << 1) | (value & 1); |
| 1895 value >>= 1; |
| 1896 } |
| 1897 result[i] = reversed_value; |
| 1898 } |
| 1899 |
| 1900 dst.SetUintArray(vform, result); |
| 1901 return dst; |
| 1902 } |
| 1903 |
| 1904 LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst, |
| 1905 const LogicVRegister& src, int revSize) { |
| 1906 uint64_t result[16]; |
| 1907 int laneCount = LaneCountFromFormat(vform); |
| 1908 int laneSize = LaneSizeInBytesFromFormat(vform); |
| 1909 int lanesPerLoop = revSize / laneSize; |
| 1910 for (int i = 0; i < laneCount; i += lanesPerLoop) { |
| 1911 for (int j = 0; j < lanesPerLoop; j++) { |
| 1912 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j); |
| 1913 } |
| 1914 } |
| 1915 dst.SetUintArray(vform, result); |
| 1916 return dst; |
| 1917 } |
| 1918 |
| 1919 LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst, |
| 1920 const LogicVRegister& src) { |
| 1921 return rev(vform, dst, src, 2); |
| 1922 } |
| 1923 |
| 1924 LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst, |
| 1925 const LogicVRegister& src) { |
| 1926 return rev(vform, dst, src, 4); |
| 1927 } |
| 1928 |
| 1929 LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst, |
| 1930 const LogicVRegister& src) { |
| 1931 return rev(vform, dst, src, 8); |
| 1932 } |
| 1933 |
| 1934 LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst, |
| 1935 const LogicVRegister& src, bool is_signed, |
| 1936 bool do_accumulate) { |
| 1937 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform); |
| 1938 DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U); |
| 1939 DCHECK_LE(LaneCountFromFormat(vform), 8); |
| 1940 |
| 1941 uint64_t result[8]; |
| 1942 int lane_count = LaneCountFromFormat(vform); |
| 1943 for (int i = 0; i < lane_count; i++) { |
| 1944 if (is_signed) { |
| 1945 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) + |
| 1946 src.Int(vformsrc, 2 * i + 1)); |
| 1947 } else { |
| 1948 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1); |
| 1949 } |
| 1950 } |
| 1951 |
| 1952 dst.ClearForWrite(vform); |
| 1953 for (int i = 0; i < lane_count; ++i) { |
| 1954 if (do_accumulate) { |
| 1955 result[i] += dst.Uint(vform, i); |
| 1956 } |
| 1957 dst.SetUint(vform, i, result[i]); |
| 1958 } |
| 1959 |
| 1960 return dst; |
| 1961 } |
| 1962 |
| 1963 LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst, |
| 1964 const LogicVRegister& src) { |
| 1965 return addlp(vform, dst, src, true, false); |
| 1966 } |
| 1967 |
| 1968 LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst, |
| 1969 const LogicVRegister& src) { |
| 1970 return addlp(vform, dst, src, false, false); |
| 1971 } |
| 1972 |
| 1973 LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst, |
| 1974 const LogicVRegister& src) { |
| 1975 return addlp(vform, dst, src, true, true); |
| 1976 } |
| 1977 |
| 1978 LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst, |
| 1979 const LogicVRegister& src) { |
| 1980 return addlp(vform, dst, src, false, true); |
| 1981 } |
| 1982 |
| 1983 LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst, |
| 1984 const LogicVRegister& src1, |
| 1985 const LogicVRegister& src2, int index) { |
| 1986 uint8_t result[16]; |
| 1987 int laneCount = LaneCountFromFormat(vform); |
| 1988 for (int i = 0; i < laneCount - index; ++i) { |
| 1989 result[i] = src1.Uint(vform, i + index); |
| 1990 } |
| 1991 for (int i = 0; i < index; ++i) { |
| 1992 result[laneCount - index + i] = src2.Uint(vform, i); |
| 1993 } |
| 1994 dst.ClearForWrite(vform); |
| 1995 for (int i = 0; i < laneCount; ++i) { |
| 1996 dst.SetUint(vform, i, result[i]); |
| 1997 } |
| 1998 return dst; |
| 1999 } |
| 2000 |
| 2001 LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst, |
| 2002 const LogicVRegister& src, |
| 2003 int src_index) { |
| 2004 int laneCount = LaneCountFromFormat(vform); |
| 2005 uint64_t value = src.Uint(vform, src_index); |
| 2006 dst.ClearForWrite(vform); |
| 2007 for (int i = 0; i < laneCount; ++i) { |
| 2008 dst.SetUint(vform, i, value); |
| 2009 } |
| 2010 return dst; |
| 2011 } |
| 2012 |
| 2013 LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst, |
| 2014 uint64_t imm) { |
| 2015 int laneCount = LaneCountFromFormat(vform); |
| 2016 uint64_t value = imm & MaxUintFromFormat(vform); |
| 2017 dst.ClearForWrite(vform); |
| 2018 for (int i = 0; i < laneCount; ++i) { |
| 2019 dst.SetUint(vform, i, value); |
| 2020 } |
| 2021 return dst; |
| 2022 } |
| 2023 |
| 2024 LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst, |
| 2025 int dst_index, const LogicVRegister& src, |
| 2026 int src_index) { |
| 2027 dst.SetUint(vform, dst_index, src.Uint(vform, src_index)); |
| 2028 return dst; |
| 2029 } |
| 2030 |
| 2031 LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst, |
| 2032 int dst_index, uint64_t imm) { |
| 2033 uint64_t value = imm & MaxUintFromFormat(vform); |
| 2034 dst.SetUint(vform, dst_index, value); |
| 2035 return dst; |
| 2036 } |
| 2037 |
| 2038 LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst, |
| 2039 uint64_t imm) { |
| 2040 int laneCount = LaneCountFromFormat(vform); |
| 2041 dst.ClearForWrite(vform); |
| 2042 for (int i = 0; i < laneCount; ++i) { |
| 2043 dst.SetUint(vform, i, imm); |
| 2044 } |
| 2045 return dst; |
| 2046 } |
| 2047 |
| 2048 LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst, |
| 2049 uint64_t imm) { |
| 2050 int laneCount = LaneCountFromFormat(vform); |
| 2051 dst.ClearForWrite(vform); |
| 2052 for (int i = 0; i < laneCount; ++i) { |
| 2053 dst.SetUint(vform, i, ~imm); |
| 2054 } |
| 2055 return dst; |
| 2056 } |
| 2057 |
| 2058 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst, |
| 2059 const LogicVRegister& src, uint64_t imm) { |
| 2060 uint64_t result[16]; |
| 2061 int laneCount = LaneCountFromFormat(vform); |
| 2062 for (int i = 0; i < laneCount; ++i) { |
| 2063 result[i] = src.Uint(vform, i) | imm; |
| 2064 } |
| 2065 dst.SetUintArray(vform, result); |
| 2066 return dst; |
| 2067 } |
| 2068 |
| 2069 LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst, |
| 2070 const LogicVRegister& src) { |
| 2071 VectorFormat vform_half = VectorFormatHalfWidth(vform); |
| 2072 |
| 2073 dst.ClearForWrite(vform); |
| 2074 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 2075 dst.SetUint(vform, i, src.Uint(vform_half, i)); |
| 2076 } |
| 2077 return dst; |
| 2078 } |
| 2079 |
| 2080 LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst, |
| 2081 const LogicVRegister& src) { |
| 2082 VectorFormat vform_half = VectorFormatHalfWidth(vform); |
| 2083 |
| 2084 dst.ClearForWrite(vform); |
| 2085 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 2086 dst.SetInt(vform, i, src.Int(vform_half, i)); |
| 2087 } |
| 2088 return dst; |
| 2089 } |
| 2090 |
| 2091 LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst, |
| 2092 const LogicVRegister& src) { |
| 2093 VectorFormat vform_half = VectorFormatHalfWidth(vform); |
| 2094 int lane_count = LaneCountFromFormat(vform); |
| 2095 |
| 2096 dst.ClearForWrite(vform); |
| 2097 for (int i = 0; i < lane_count; i++) { |
| 2098 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i)); |
| 2099 } |
| 2100 return dst; |
| 2101 } |
| 2102 |
| 2103 LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst, |
| 2104 const LogicVRegister& src) { |
| 2105 VectorFormat vform_half = VectorFormatHalfWidth(vform); |
| 2106 int lane_count = LaneCountFromFormat(vform); |
| 2107 |
| 2108 dst.ClearForWrite(vform); |
| 2109 for (int i = 0; i < lane_count; i++) { |
| 2110 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i)); |
| 2111 } |
| 2112 return dst; |
| 2113 } |
| 2114 |
| 2115 LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst, |
| 2116 const LogicVRegister& src, int shift) { |
| 2117 SimVRegister temp; |
| 2118 VectorFormat vform_src = VectorFormatDoubleWidth(vform); |
| 2119 VectorFormat vform_dst = vform; |
| 2120 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift); |
| 2121 return ExtractNarrow(vform_dst, dst, false, shifted_src, false); |
| 2122 } |
| 2123 |
| 2124 LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst, |
| 2125 const LogicVRegister& src, int shift) { |
| 2126 SimVRegister temp; |
| 2127 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); |
| 2128 VectorFormat vformdst = vform; |
| 2129 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift); |
| 2130 return ExtractNarrow(vformdst, dst, false, shifted_src, false); |
| 2131 } |
| 2132 |
| 2133 LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst, |
| 2134 const LogicVRegister& src, int shift) { |
| 2135 SimVRegister temp; |
| 2136 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); |
| 2137 VectorFormat vformdst = vform; |
| 2138 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); |
| 2139 return ExtractNarrow(vformdst, dst, false, shifted_src, false); |
| 2140 } |
| 2141 |
| 2142 LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst, |
| 2143 const LogicVRegister& src, int shift) { |
| 2144 SimVRegister temp; |
| 2145 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); |
| 2146 VectorFormat vformdst = vform; |
| 2147 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); |
| 2148 return ExtractNarrow(vformdst, dst, false, shifted_src, false); |
| 2149 } |
| 2150 |
| 2151 LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst, |
| 2152 const LogicVRegister& ind, |
| 2153 bool zero_out_of_bounds, |
| 2154 const LogicVRegister* tab1, |
| 2155 const LogicVRegister* tab2, |
| 2156 const LogicVRegister* tab3, |
| 2157 const LogicVRegister* tab4) { |
| 2158 DCHECK_NOT_NULL(tab1); |
| 2159 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4}; |
| 2160 uint64_t result[kMaxLanesPerVector]; |
| 2161 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 2162 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i); |
| 2163 } |
| 2164 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 2165 uint64_t j = ind.Uint(vform, i); |
| 2166 int tab_idx = static_cast<int>(j >> 4); |
| 2167 int j_idx = static_cast<int>(j & 15); |
| 2168 if ((tab_idx < 4) && (tab[tab_idx] != NULL)) { |
| 2169 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx); |
| 2170 } |
| 2171 } |
| 2172 dst.SetUintArray(vform, result); |
| 2173 return dst; |
| 2174 } |
| 2175 |
| 2176 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, |
| 2177 const LogicVRegister& tab, |
| 2178 const LogicVRegister& ind) { |
| 2179 return Table(vform, dst, ind, true, &tab); |
| 2180 } |
| 2181 |
| 2182 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, |
| 2183 const LogicVRegister& tab, |
| 2184 const LogicVRegister& tab2, |
| 2185 const LogicVRegister& ind) { |
| 2186 return Table(vform, dst, ind, true, &tab, &tab2); |
| 2187 } |
| 2188 |
| 2189 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, |
| 2190 const LogicVRegister& tab, |
| 2191 const LogicVRegister& tab2, |
| 2192 const LogicVRegister& tab3, |
| 2193 const LogicVRegister& ind) { |
| 2194 return Table(vform, dst, ind, true, &tab, &tab2, &tab3); |
| 2195 } |
| 2196 |
| 2197 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, |
| 2198 const LogicVRegister& tab, |
| 2199 const LogicVRegister& tab2, |
| 2200 const LogicVRegister& tab3, |
| 2201 const LogicVRegister& tab4, |
| 2202 const LogicVRegister& ind) { |
| 2203 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4); |
| 2204 } |
| 2205 |
| 2206 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, |
| 2207 const LogicVRegister& tab, |
| 2208 const LogicVRegister& ind) { |
| 2209 return Table(vform, dst, ind, false, &tab); |
| 2210 } |
| 2211 |
| 2212 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, |
| 2213 const LogicVRegister& tab, |
| 2214 const LogicVRegister& tab2, |
| 2215 const LogicVRegister& ind) { |
| 2216 return Table(vform, dst, ind, false, &tab, &tab2); |
| 2217 } |
| 2218 |
| 2219 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, |
| 2220 const LogicVRegister& tab, |
| 2221 const LogicVRegister& tab2, |
| 2222 const LogicVRegister& tab3, |
| 2223 const LogicVRegister& ind) { |
| 2224 return Table(vform, dst, ind, false, &tab, &tab2, &tab3); |
| 2225 } |
| 2226 |
| 2227 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, |
| 2228 const LogicVRegister& tab, |
| 2229 const LogicVRegister& tab2, |
| 2230 const LogicVRegister& tab3, |
| 2231 const LogicVRegister& tab4, |
| 2232 const LogicVRegister& ind) { |
| 2233 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4); |
| 2234 } |
| 2235 |
| 2236 LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst, |
| 2237 const LogicVRegister& src, int shift) { |
| 2238 return shrn(vform, dst, src, shift).UnsignedSaturate(vform); |
| 2239 } |
| 2240 |
| 2241 LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst, |
| 2242 const LogicVRegister& src, int shift) { |
| 2243 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform); |
| 2244 } |
| 2245 |
| 2246 LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst, |
| 2247 const LogicVRegister& src, int shift) { |
| 2248 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform); |
| 2249 } |
| 2250 |
| 2251 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst, |
| 2252 const LogicVRegister& src, int shift) { |
| 2253 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform); |
| 2254 } |
| 2255 |
| 2256 LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst, |
| 2257 const LogicVRegister& src, int shift) { |
| 2258 SimVRegister temp; |
| 2259 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); |
| 2260 VectorFormat vformdst = vform; |
| 2261 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); |
| 2262 return sqxtn(vformdst, dst, shifted_src); |
| 2263 } |
| 2264 |
| 2265 LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst, |
| 2266 const LogicVRegister& src, int shift) { |
| 2267 SimVRegister temp; |
| 2268 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); |
| 2269 VectorFormat vformdst = vform; |
| 2270 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); |
| 2271 return sqxtn(vformdst, dst, shifted_src); |
| 2272 } |
| 2273 |
| 2274 LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst, |
| 2275 const LogicVRegister& src, int shift) { |
| 2276 SimVRegister temp; |
| 2277 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); |
| 2278 VectorFormat vformdst = vform; |
| 2279 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); |
| 2280 return sqxtn(vformdst, dst, shifted_src); |
| 2281 } |
| 2282 |
| 2283 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst, |
| 2284 const LogicVRegister& src, int shift) { |
| 2285 SimVRegister temp; |
| 2286 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); |
| 2287 VectorFormat vformdst = vform; |
| 2288 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); |
| 2289 return sqxtn(vformdst, dst, shifted_src); |
| 2290 } |
| 2291 |
| 2292 LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst, |
| 2293 const LogicVRegister& src, int shift) { |
| 2294 SimVRegister temp; |
| 2295 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); |
| 2296 VectorFormat vformdst = vform; |
| 2297 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); |
| 2298 return sqxtun(vformdst, dst, shifted_src); |
| 2299 } |
| 2300 |
| 2301 LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst, |
| 2302 const LogicVRegister& src, int shift) { |
| 2303 SimVRegister temp; |
| 2304 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); |
| 2305 VectorFormat vformdst = vform; |
| 2306 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); |
| 2307 return sqxtun(vformdst, dst, shifted_src); |
| 2308 } |
| 2309 |
| 2310 LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst, |
| 2311 const LogicVRegister& src, int shift) { |
| 2312 SimVRegister temp; |
| 2313 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); |
| 2314 VectorFormat vformdst = vform; |
| 2315 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); |
| 2316 return sqxtun(vformdst, dst, shifted_src); |
| 2317 } |
| 2318 |
| 2319 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst, |
| 2320 const LogicVRegister& src, int shift) { |
| 2321 SimVRegister temp; |
| 2322 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); |
| 2323 VectorFormat vformdst = vform; |
| 2324 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); |
| 2325 return sqxtun(vformdst, dst, shifted_src); |
| 2326 } |
| 2327 |
| 2328 LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst, |
| 2329 const LogicVRegister& src1, |
| 2330 const LogicVRegister& src2) { |
| 2331 SimVRegister temp1, temp2; |
| 2332 uxtl(vform, temp1, src1); |
| 2333 uxtl(vform, temp2, src2); |
| 2334 add(vform, dst, temp1, temp2); |
| 2335 return dst; |
| 2336 } |
| 2337 |
| 2338 LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst, |
| 2339 const LogicVRegister& src1, |
| 2340 const LogicVRegister& src2) { |
| 2341 SimVRegister temp1, temp2; |
| 2342 uxtl2(vform, temp1, src1); |
| 2343 uxtl2(vform, temp2, src2); |
| 2344 add(vform, dst, temp1, temp2); |
| 2345 return dst; |
| 2346 } |
| 2347 |
| 2348 LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst, |
| 2349 const LogicVRegister& src1, |
| 2350 const LogicVRegister& src2) { |
| 2351 SimVRegister temp; |
| 2352 uxtl(vform, temp, src2); |
| 2353 add(vform, dst, src1, temp); |
| 2354 return dst; |
| 2355 } |
| 2356 |
| 2357 LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst, |
| 2358 const LogicVRegister& src1, |
| 2359 const LogicVRegister& src2) { |
| 2360 SimVRegister temp; |
| 2361 uxtl2(vform, temp, src2); |
| 2362 add(vform, dst, src1, temp); |
| 2363 return dst; |
| 2364 } |
| 2365 |
| 2366 LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst, |
| 2367 const LogicVRegister& src1, |
| 2368 const LogicVRegister& src2) { |
| 2369 SimVRegister temp1, temp2; |
| 2370 sxtl(vform, temp1, src1); |
| 2371 sxtl(vform, temp2, src2); |
| 2372 add(vform, dst, temp1, temp2); |
| 2373 return dst; |
| 2374 } |
| 2375 |
| 2376 LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst, |
| 2377 const LogicVRegister& src1, |
| 2378 const LogicVRegister& src2) { |
| 2379 SimVRegister temp1, temp2; |
| 2380 sxtl2(vform, temp1, src1); |
| 2381 sxtl2(vform, temp2, src2); |
| 2382 add(vform, dst, temp1, temp2); |
| 2383 return dst; |
| 2384 } |
| 2385 |
| 2386 LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst, |
| 2387 const LogicVRegister& src1, |
| 2388 const LogicVRegister& src2) { |
| 2389 SimVRegister temp; |
| 2390 sxtl(vform, temp, src2); |
| 2391 add(vform, dst, src1, temp); |
| 2392 return dst; |
| 2393 } |
| 2394 |
| 2395 LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst, |
| 2396 const LogicVRegister& src1, |
| 2397 const LogicVRegister& src2) { |
| 2398 SimVRegister temp; |
| 2399 sxtl2(vform, temp, src2); |
| 2400 add(vform, dst, src1, temp); |
| 2401 return dst; |
| 2402 } |
| 2403 |
| 2404 LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst, |
| 2405 const LogicVRegister& src1, |
| 2406 const LogicVRegister& src2) { |
| 2407 SimVRegister temp1, temp2; |
| 2408 uxtl(vform, temp1, src1); |
| 2409 uxtl(vform, temp2, src2); |
| 2410 sub(vform, dst, temp1, temp2); |
| 2411 return dst; |
| 2412 } |
| 2413 |
| 2414 LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst, |
| 2415 const LogicVRegister& src1, |
| 2416 const LogicVRegister& src2) { |
| 2417 SimVRegister temp1, temp2; |
| 2418 uxtl2(vform, temp1, src1); |
| 2419 uxtl2(vform, temp2, src2); |
| 2420 sub(vform, dst, temp1, temp2); |
| 2421 return dst; |
| 2422 } |
| 2423 |
| 2424 LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst, |
| 2425 const LogicVRegister& src1, |
| 2426 const LogicVRegister& src2) { |
| 2427 SimVRegister temp; |
| 2428 uxtl(vform, temp, src2); |
| 2429 sub(vform, dst, src1, temp); |
| 2430 return dst; |
| 2431 } |
| 2432 |
| 2433 LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst, |
| 2434 const LogicVRegister& src1, |
| 2435 const LogicVRegister& src2) { |
| 2436 SimVRegister temp; |
| 2437 uxtl2(vform, temp, src2); |
| 2438 sub(vform, dst, src1, temp); |
| 2439 return dst; |
| 2440 } |
| 2441 |
| 2442 LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst, |
| 2443 const LogicVRegister& src1, |
| 2444 const LogicVRegister& src2) { |
| 2445 SimVRegister temp1, temp2; |
| 2446 sxtl(vform, temp1, src1); |
| 2447 sxtl(vform, temp2, src2); |
| 2448 sub(vform, dst, temp1, temp2); |
| 2449 return dst; |
| 2450 } |
| 2451 |
| 2452 LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst, |
| 2453 const LogicVRegister& src1, |
| 2454 const LogicVRegister& src2) { |
| 2455 SimVRegister temp1, temp2; |
| 2456 sxtl2(vform, temp1, src1); |
| 2457 sxtl2(vform, temp2, src2); |
| 2458 sub(vform, dst, temp1, temp2); |
| 2459 return dst; |
| 2460 } |
| 2461 |
| 2462 LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst, |
| 2463 const LogicVRegister& src1, |
| 2464 const LogicVRegister& src2) { |
| 2465 SimVRegister temp; |
| 2466 sxtl(vform, temp, src2); |
| 2467 sub(vform, dst, src1, temp); |
| 2468 return dst; |
| 2469 } |
| 2470 |
| 2471 LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst, |
| 2472 const LogicVRegister& src1, |
| 2473 const LogicVRegister& src2) { |
| 2474 SimVRegister temp; |
| 2475 sxtl2(vform, temp, src2); |
| 2476 sub(vform, dst, src1, temp); |
| 2477 return dst; |
| 2478 } |
| 2479 |
| 2480 LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst, |
| 2481 const LogicVRegister& src1, |
| 2482 const LogicVRegister& src2) { |
| 2483 SimVRegister temp1, temp2; |
| 2484 uxtl(vform, temp1, src1); |
| 2485 uxtl(vform, temp2, src2); |
| 2486 uaba(vform, dst, temp1, temp2); |
| 2487 return dst; |
| 2488 } |
| 2489 |
| 2490 LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst, |
| 2491 const LogicVRegister& src1, |
| 2492 const LogicVRegister& src2) { |
| 2493 SimVRegister temp1, temp2; |
| 2494 uxtl2(vform, temp1, src1); |
| 2495 uxtl2(vform, temp2, src2); |
| 2496 uaba(vform, dst, temp1, temp2); |
| 2497 return dst; |
| 2498 } |
| 2499 |
| 2500 LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst, |
| 2501 const LogicVRegister& src1, |
| 2502 const LogicVRegister& src2) { |
| 2503 SimVRegister temp1, temp2; |
| 2504 sxtl(vform, temp1, src1); |
| 2505 sxtl(vform, temp2, src2); |
| 2506 saba(vform, dst, temp1, temp2); |
| 2507 return dst; |
| 2508 } |
| 2509 |
| 2510 LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst, |
| 2511 const LogicVRegister& src1, |
| 2512 const LogicVRegister& src2) { |
| 2513 SimVRegister temp1, temp2; |
| 2514 sxtl2(vform, temp1, src1); |
| 2515 sxtl2(vform, temp2, src2); |
| 2516 saba(vform, dst, temp1, temp2); |
| 2517 return dst; |
| 2518 } |
| 2519 |
| 2520 LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst, |
| 2521 const LogicVRegister& src1, |
| 2522 const LogicVRegister& src2) { |
| 2523 SimVRegister temp1, temp2; |
| 2524 uxtl(vform, temp1, src1); |
| 2525 uxtl(vform, temp2, src2); |
| 2526 AbsDiff(vform, dst, temp1, temp2, false); |
| 2527 return dst; |
| 2528 } |
| 2529 |
| 2530 LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst, |
| 2531 const LogicVRegister& src1, |
| 2532 const LogicVRegister& src2) { |
| 2533 SimVRegister temp1, temp2; |
| 2534 uxtl2(vform, temp1, src1); |
| 2535 uxtl2(vform, temp2, src2); |
| 2536 AbsDiff(vform, dst, temp1, temp2, false); |
| 2537 return dst; |
| 2538 } |
| 2539 |
| 2540 LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst, |
| 2541 const LogicVRegister& src1, |
| 2542 const LogicVRegister& src2) { |
| 2543 SimVRegister temp1, temp2; |
| 2544 sxtl(vform, temp1, src1); |
| 2545 sxtl(vform, temp2, src2); |
| 2546 AbsDiff(vform, dst, temp1, temp2, true); |
| 2547 return dst; |
| 2548 } |
| 2549 |
| 2550 LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst, |
| 2551 const LogicVRegister& src1, |
| 2552 const LogicVRegister& src2) { |
| 2553 SimVRegister temp1, temp2; |
| 2554 sxtl2(vform, temp1, src1); |
| 2555 sxtl2(vform, temp2, src2); |
| 2556 AbsDiff(vform, dst, temp1, temp2, true); |
| 2557 return dst; |
| 2558 } |
| 2559 |
| 2560 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst, |
| 2561 const LogicVRegister& src1, |
| 2562 const LogicVRegister& src2) { |
| 2563 SimVRegister temp1, temp2; |
| 2564 uxtl(vform, temp1, src1); |
| 2565 uxtl(vform, temp2, src2); |
| 2566 mul(vform, dst, temp1, temp2); |
| 2567 return dst; |
| 2568 } |
| 2569 |
| 2570 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst, |
| 2571 const LogicVRegister& src1, |
| 2572 const LogicVRegister& src2) { |
| 2573 SimVRegister temp1, temp2; |
| 2574 uxtl2(vform, temp1, src1); |
| 2575 uxtl2(vform, temp2, src2); |
| 2576 mul(vform, dst, temp1, temp2); |
| 2577 return dst; |
| 2578 } |
| 2579 |
| 2580 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst, |
| 2581 const LogicVRegister& src1, |
| 2582 const LogicVRegister& src2) { |
| 2583 SimVRegister temp1, temp2; |
| 2584 sxtl(vform, temp1, src1); |
| 2585 sxtl(vform, temp2, src2); |
| 2586 mul(vform, dst, temp1, temp2); |
| 2587 return dst; |
| 2588 } |
| 2589 |
| 2590 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst, |
| 2591 const LogicVRegister& src1, |
| 2592 const LogicVRegister& src2) { |
| 2593 SimVRegister temp1, temp2; |
| 2594 sxtl2(vform, temp1, src1); |
| 2595 sxtl2(vform, temp2, src2); |
| 2596 mul(vform, dst, temp1, temp2); |
| 2597 return dst; |
| 2598 } |
| 2599 |
| 2600 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst, |
| 2601 const LogicVRegister& src1, |
| 2602 const LogicVRegister& src2) { |
| 2603 SimVRegister temp1, temp2; |
| 2604 uxtl(vform, temp1, src1); |
| 2605 uxtl(vform, temp2, src2); |
| 2606 mls(vform, dst, temp1, temp2); |
| 2607 return dst; |
| 2608 } |
| 2609 |
| 2610 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst, |
| 2611 const LogicVRegister& src1, |
| 2612 const LogicVRegister& src2) { |
| 2613 SimVRegister temp1, temp2; |
| 2614 uxtl2(vform, temp1, src1); |
| 2615 uxtl2(vform, temp2, src2); |
| 2616 mls(vform, dst, temp1, temp2); |
| 2617 return dst; |
| 2618 } |
| 2619 |
| 2620 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst, |
| 2621 const LogicVRegister& src1, |
| 2622 const LogicVRegister& src2) { |
| 2623 SimVRegister temp1, temp2; |
| 2624 sxtl(vform, temp1, src1); |
| 2625 sxtl(vform, temp2, src2); |
| 2626 mls(vform, dst, temp1, temp2); |
| 2627 return dst; |
| 2628 } |
| 2629 |
| 2630 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst, |
| 2631 const LogicVRegister& src1, |
| 2632 const LogicVRegister& src2) { |
| 2633 SimVRegister temp1, temp2; |
| 2634 sxtl2(vform, temp1, src1); |
| 2635 sxtl2(vform, temp2, src2); |
| 2636 mls(vform, dst, temp1, temp2); |
| 2637 return dst; |
| 2638 } |
| 2639 |
| 2640 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst, |
| 2641 const LogicVRegister& src1, |
| 2642 const LogicVRegister& src2) { |
| 2643 SimVRegister temp1, temp2; |
| 2644 uxtl(vform, temp1, src1); |
| 2645 uxtl(vform, temp2, src2); |
| 2646 mla(vform, dst, temp1, temp2); |
| 2647 return dst; |
| 2648 } |
| 2649 |
| 2650 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst, |
| 2651 const LogicVRegister& src1, |
| 2652 const LogicVRegister& src2) { |
| 2653 SimVRegister temp1, temp2; |
| 2654 uxtl2(vform, temp1, src1); |
| 2655 uxtl2(vform, temp2, src2); |
| 2656 mla(vform, dst, temp1, temp2); |
| 2657 return dst; |
| 2658 } |
| 2659 |
| 2660 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst, |
| 2661 const LogicVRegister& src1, |
| 2662 const LogicVRegister& src2) { |
| 2663 SimVRegister temp1, temp2; |
| 2664 sxtl(vform, temp1, src1); |
| 2665 sxtl(vform, temp2, src2); |
| 2666 mla(vform, dst, temp1, temp2); |
| 2667 return dst; |
| 2668 } |
| 2669 |
| 2670 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst, |
| 2671 const LogicVRegister& src1, |
| 2672 const LogicVRegister& src2) { |
| 2673 SimVRegister temp1, temp2; |
| 2674 sxtl2(vform, temp1, src1); |
| 2675 sxtl2(vform, temp2, src2); |
| 2676 mla(vform, dst, temp1, temp2); |
| 2677 return dst; |
| 2678 } |
| 2679 |
| 2680 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst, |
| 2681 const LogicVRegister& src1, |
| 2682 const LogicVRegister& src2) { |
| 2683 SimVRegister temp; |
| 2684 LogicVRegister product = sqdmull(vform, temp, src1, src2); |
| 2685 return add(vform, dst, dst, product).SignedSaturate(vform); |
| 2686 } |
| 2687 |
| 2688 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst, |
| 2689 const LogicVRegister& src1, |
| 2690 const LogicVRegister& src2) { |
| 2691 SimVRegister temp; |
| 2692 LogicVRegister product = sqdmull2(vform, temp, src1, src2); |
| 2693 return add(vform, dst, dst, product).SignedSaturate(vform); |
| 2694 } |
| 2695 |
| 2696 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst, |
| 2697 const LogicVRegister& src1, |
| 2698 const LogicVRegister& src2) { |
| 2699 SimVRegister temp; |
| 2700 LogicVRegister product = sqdmull(vform, temp, src1, src2); |
| 2701 return sub(vform, dst, dst, product).SignedSaturate(vform); |
| 2702 } |
| 2703 |
| 2704 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst, |
| 2705 const LogicVRegister& src1, |
| 2706 const LogicVRegister& src2) { |
| 2707 SimVRegister temp; |
| 2708 LogicVRegister product = sqdmull2(vform, temp, src1, src2); |
| 2709 return sub(vform, dst, dst, product).SignedSaturate(vform); |
| 2710 } |
| 2711 |
| 2712 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst, |
| 2713 const LogicVRegister& src1, |
| 2714 const LogicVRegister& src2) { |
| 2715 SimVRegister temp; |
| 2716 LogicVRegister product = smull(vform, temp, src1, src2); |
| 2717 return add(vform, dst, product, product).SignedSaturate(vform); |
| 2718 } |
| 2719 |
| 2720 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst, |
| 2721 const LogicVRegister& src1, |
| 2722 const LogicVRegister& src2) { |
| 2723 SimVRegister temp; |
| 2724 LogicVRegister product = smull2(vform, temp, src1, src2); |
| 2725 return add(vform, dst, product, product).SignedSaturate(vform); |
| 2726 } |
| 2727 |
| 2728 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst, |
| 2729 const LogicVRegister& src1, |
| 2730 const LogicVRegister& src2, bool round) { |
| 2731 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow. |
| 2732 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1) |
| 2733 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize. |
| 2734 |
| 2735 int esize = LaneSizeInBitsFromFormat(vform); |
| 2736 int round_const = round ? (1 << (esize - 2)) : 0; |
| 2737 int64_t product; |
| 2738 |
| 2739 dst.ClearForWrite(vform); |
| 2740 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 2741 product = src1.Int(vform, i) * src2.Int(vform, i); |
| 2742 product += round_const; |
| 2743 product = product >> (esize - 1); |
| 2744 |
| 2745 if (product > MaxIntFromFormat(vform)) { |
| 2746 product = MaxIntFromFormat(vform); |
| 2747 } else if (product < MinIntFromFormat(vform)) { |
| 2748 product = MinIntFromFormat(vform); |
| 2749 } |
| 2750 dst.SetInt(vform, i, product); |
| 2751 } |
| 2752 return dst; |
| 2753 } |
| 2754 |
| 2755 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst, |
| 2756 const LogicVRegister& src1, |
| 2757 const LogicVRegister& src2) { |
| 2758 return sqrdmulh(vform, dst, src1, src2, false); |
| 2759 } |
| 2760 |
| 2761 LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst, |
| 2762 const LogicVRegister& src1, |
| 2763 const LogicVRegister& src2) { |
| 2764 SimVRegister temp; |
| 2765 add(VectorFormatDoubleWidth(vform), temp, src1, src2); |
| 2766 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); |
| 2767 return dst; |
| 2768 } |
| 2769 |
| 2770 LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst, |
| 2771 const LogicVRegister& src1, |
| 2772 const LogicVRegister& src2) { |
| 2773 SimVRegister temp; |
| 2774 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); |
| 2775 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); |
| 2776 return dst; |
| 2777 } |
| 2778 |
| 2779 LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst, |
| 2780 const LogicVRegister& src1, |
| 2781 const LogicVRegister& src2) { |
| 2782 SimVRegister temp; |
| 2783 add(VectorFormatDoubleWidth(vform), temp, src1, src2); |
| 2784 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); |
| 2785 return dst; |
| 2786 } |
| 2787 |
| 2788 LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst, |
| 2789 const LogicVRegister& src1, |
| 2790 const LogicVRegister& src2) { |
| 2791 SimVRegister temp; |
| 2792 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); |
| 2793 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); |
| 2794 return dst; |
| 2795 } |
| 2796 |
| 2797 LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst, |
| 2798 const LogicVRegister& src1, |
| 2799 const LogicVRegister& src2) { |
| 2800 SimVRegister temp; |
| 2801 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); |
| 2802 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); |
| 2803 return dst; |
| 2804 } |
| 2805 |
| 2806 LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst, |
| 2807 const LogicVRegister& src1, |
| 2808 const LogicVRegister& src2) { |
| 2809 SimVRegister temp; |
| 2810 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); |
| 2811 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); |
| 2812 return dst; |
| 2813 } |
| 2814 |
| 2815 LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst, |
| 2816 const LogicVRegister& src1, |
| 2817 const LogicVRegister& src2) { |
| 2818 SimVRegister temp; |
| 2819 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); |
| 2820 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); |
| 2821 return dst; |
| 2822 } |
| 2823 |
| 2824 LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst, |
| 2825 const LogicVRegister& src1, |
| 2826 const LogicVRegister& src2) { |
| 2827 SimVRegister temp; |
| 2828 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); |
| 2829 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); |
| 2830 return dst; |
| 2831 } |
| 2832 |
| 2833 LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst, |
| 2834 const LogicVRegister& src1, |
| 2835 const LogicVRegister& src2) { |
| 2836 uint64_t result[16]; |
| 2837 int laneCount = LaneCountFromFormat(vform); |
| 2838 int pairs = laneCount / 2; |
| 2839 for (int i = 0; i < pairs; ++i) { |
| 2840 result[2 * i] = src1.Uint(vform, 2 * i); |
| 2841 result[(2 * i) + 1] = src2.Uint(vform, 2 * i); |
| 2842 } |
| 2843 |
| 2844 dst.SetUintArray(vform, result); |
| 2845 return dst; |
| 2846 } |
| 2847 |
| 2848 LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst, |
| 2849 const LogicVRegister& src1, |
| 2850 const LogicVRegister& src2) { |
| 2851 uint64_t result[16]; |
| 2852 int laneCount = LaneCountFromFormat(vform); |
| 2853 int pairs = laneCount / 2; |
| 2854 for (int i = 0; i < pairs; ++i) { |
| 2855 result[2 * i] = src1.Uint(vform, (2 * i) + 1); |
| 2856 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1); |
| 2857 } |
| 2858 |
| 2859 dst.SetUintArray(vform, result); |
| 2860 return dst; |
| 2861 } |
| 2862 |
| 2863 LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst, |
| 2864 const LogicVRegister& src1, |
| 2865 const LogicVRegister& src2) { |
| 2866 uint64_t result[16]; |
| 2867 int laneCount = LaneCountFromFormat(vform); |
| 2868 int pairs = laneCount / 2; |
| 2869 for (int i = 0; i < pairs; ++i) { |
| 2870 result[2 * i] = src1.Uint(vform, i); |
| 2871 result[(2 * i) + 1] = src2.Uint(vform, i); |
| 2872 } |
| 2873 |
| 2874 dst.SetUintArray(vform, result); |
| 2875 return dst; |
| 2876 } |
| 2877 |
| 2878 LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst, |
| 2879 const LogicVRegister& src1, |
| 2880 const LogicVRegister& src2) { |
| 2881 uint64_t result[16]; |
| 2882 int laneCount = LaneCountFromFormat(vform); |
| 2883 int pairs = laneCount / 2; |
| 2884 for (int i = 0; i < pairs; ++i) { |
| 2885 result[2 * i] = src1.Uint(vform, pairs + i); |
| 2886 result[(2 * i) + 1] = src2.Uint(vform, pairs + i); |
| 2887 } |
| 2888 |
| 2889 dst.SetUintArray(vform, result); |
| 2890 return dst; |
| 2891 } |
| 2892 |
| 2893 LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst, |
| 2894 const LogicVRegister& src1, |
| 2895 const LogicVRegister& src2) { |
| 2896 uint64_t result[32]; |
| 2897 int laneCount = LaneCountFromFormat(vform); |
| 2898 for (int i = 0; i < laneCount; ++i) { |
| 2899 result[i] = src1.Uint(vform, i); |
| 2900 result[laneCount + i] = src2.Uint(vform, i); |
| 2901 } |
| 2902 |
| 2903 dst.ClearForWrite(vform); |
| 2904 for (int i = 0; i < laneCount; ++i) { |
| 2905 dst.SetUint(vform, i, result[2 * i]); |
| 2906 } |
| 2907 return dst; |
| 2908 } |
| 2909 |
| 2910 LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst, |
| 2911 const LogicVRegister& src1, |
| 2912 const LogicVRegister& src2) { |
| 2913 uint64_t result[32]; |
| 2914 int laneCount = LaneCountFromFormat(vform); |
| 2915 for (int i = 0; i < laneCount; ++i) { |
| 2916 result[i] = src1.Uint(vform, i); |
| 2917 result[laneCount + i] = src2.Uint(vform, i); |
| 2918 } |
| 2919 |
| 2920 dst.ClearForWrite(vform); |
| 2921 for (int i = 0; i < laneCount; ++i) { |
| 2922 dst.SetUint(vform, i, result[(2 * i) + 1]); |
| 2923 } |
| 2924 return dst; |
| 2925 } |
| 2926 |
| 2927 template <typename T> |
| 2928 T Simulator::FPAdd(T op1, T op2) { |
| 2929 T result = FPProcessNaNs(op1, op2); |
| 2930 if (std::isnan(result)) return result; |
| 2931 |
| 2932 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) { |
| 2933 // inf + -inf returns the default NaN. |
| 2934 FPProcessException(); |
| 2935 return FPDefaultNaN<T>(); |
| 2936 } else { |
| 2937 // Other cases should be handled by standard arithmetic. |
| 2938 return op1 + op2; |
| 2939 } |
| 2940 } |
| 2941 |
| 2942 template <typename T> |
| 2943 T Simulator::FPSub(T op1, T op2) { |
| 2944 // NaNs should be handled elsewhere. |
| 2945 DCHECK(!std::isnan(op1) && !std::isnan(op2)); |
| 2946 |
| 2947 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) { |
| 2948 // inf - inf returns the default NaN. |
| 2949 FPProcessException(); |
| 2950 return FPDefaultNaN<T>(); |
| 2951 } else { |
| 2952 // Other cases should be handled by standard arithmetic. |
| 2953 return op1 - op2; |
| 2954 } |
| 2955 } |
| 2956 |
| 2957 template <typename T> |
| 2958 T Simulator::FPMul(T op1, T op2) { |
| 2959 // NaNs should be handled elsewhere. |
| 2960 DCHECK(!std::isnan(op1) && !std::isnan(op2)); |
| 2961 |
| 2962 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { |
| 2963 // inf * 0.0 returns the default NaN. |
| 2964 FPProcessException(); |
| 2965 return FPDefaultNaN<T>(); |
| 2966 } else { |
| 2967 // Other cases should be handled by standard arithmetic. |
| 2968 return op1 * op2; |
| 2969 } |
| 2970 } |
| 2971 |
| 2972 template <typename T> |
| 2973 T Simulator::FPMulx(T op1, T op2) { |
| 2974 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { |
| 2975 // inf * 0.0 returns +/-2.0. |
| 2976 T two = 2.0; |
| 2977 return copysign(1.0, op1) * copysign(1.0, op2) * two; |
| 2978 } |
| 2979 return FPMul(op1, op2); |
| 2980 } |
| 2981 |
| 2982 template <typename T> |
| 2983 T Simulator::FPMulAdd(T a, T op1, T op2) { |
| 2984 T result = FPProcessNaNs3(a, op1, op2); |
| 2985 |
| 2986 T sign_a = copysign(1.0, a); |
| 2987 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2); |
| 2988 bool isinf_prod = std::isinf(op1) || std::isinf(op2); |
| 2989 bool operation_generates_nan = |
| 2990 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0 |
| 2991 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf |
| 2992 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf |
| 2993 |
| 2994 if (std::isnan(result)) { |
| 2995 // Generated NaNs override quiet NaNs propagated from a. |
| 2996 if (operation_generates_nan && IsQuietNaN(a)) { |
| 2997 FPProcessException(); |
| 2998 return FPDefaultNaN<T>(); |
| 2999 } else { |
| 3000 return result; |
| 3001 } |
| 3002 } |
| 3003 |
| 3004 // If the operation would produce a NaN, return the default NaN. |
| 3005 if (operation_generates_nan) { |
| 3006 FPProcessException(); |
| 3007 return FPDefaultNaN<T>(); |
| 3008 } |
| 3009 |
| 3010 // Work around broken fma implementations for exact zero results: The sign of |
| 3011 // exact 0.0 results is positive unless both a and op1 * op2 are negative. |
| 3012 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) { |
| 3013 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0; |
| 3014 } |
| 3015 |
| 3016 result = FusedMultiplyAdd(op1, op2, a); |
| 3017 DCHECK(!std::isnan(result)); |
| 3018 |
| 3019 // Work around broken fma implementations for rounded zero results: If a is |
| 3020 // 0.0, the sign of the result is the sign of op1 * op2 before rounding. |
| 3021 if ((a == 0.0) && (result == 0.0)) { |
| 3022 return copysign(0.0, sign_prod); |
| 3023 } |
| 3024 |
| 3025 return result; |
| 3026 } |
| 3027 |
| 3028 template <typename T> |
| 3029 T Simulator::FPDiv(T op1, T op2) { |
| 3030 // NaNs should be handled elsewhere. |
| 3031 DCHECK(!std::isnan(op1) && !std::isnan(op2)); |
| 3032 |
| 3033 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) { |
| 3034 // inf / inf and 0.0 / 0.0 return the default NaN. |
| 3035 FPProcessException(); |
| 3036 return FPDefaultNaN<T>(); |
| 3037 } else { |
| 3038 if (op2 == 0.0) { |
| 3039 FPProcessException(); |
| 3040 if (!std::isnan(op1)) { |
| 3041 double op1_sign = copysign(1.0, op1); |
| 3042 double op2_sign = copysign(1.0, op2); |
| 3043 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity); |
| 3044 } |
| 3045 } |
| 3046 |
| 3047 // Other cases should be handled by standard arithmetic. |
| 3048 return op1 / op2; |
| 3049 } |
| 3050 } |
| 3051 |
| 3052 template <typename T> |
| 3053 T Simulator::FPSqrt(T op) { |
| 3054 if (std::isnan(op)) { |
| 3055 return FPProcessNaN(op); |
| 3056 } else if (op < 0.0) { |
| 3057 FPProcessException(); |
| 3058 return FPDefaultNaN<T>(); |
| 3059 } else { |
| 3060 return sqrt(op); |
| 3061 } |
| 3062 } |
| 3063 |
| 3064 template <typename T> |
| 3065 T Simulator::FPMax(T a, T b) { |
| 3066 T result = FPProcessNaNs(a, b); |
| 3067 if (std::isnan(result)) return result; |
| 3068 |
| 3069 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { |
| 3070 // a and b are zero, and the sign differs: return +0.0. |
| 3071 return 0.0; |
| 3072 } else { |
| 3073 return (a > b) ? a : b; |
| 3074 } |
| 3075 } |
| 3076 |
| 3077 template <typename T> |
| 3078 T Simulator::FPMaxNM(T a, T b) { |
| 3079 if (IsQuietNaN(a) && !IsQuietNaN(b)) { |
| 3080 a = kFP64NegativeInfinity; |
| 3081 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { |
| 3082 b = kFP64NegativeInfinity; |
| 3083 } |
| 3084 |
| 3085 T result = FPProcessNaNs(a, b); |
| 3086 return std::isnan(result) ? result : FPMax(a, b); |
| 3087 } |
| 3088 |
| 3089 template <typename T> |
| 3090 T Simulator::FPMin(T a, T b) { |
| 3091 T result = FPProcessNaNs(a, b); |
| 3092 if (std::isnan(result)) return result; |
| 3093 |
| 3094 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { |
| 3095 // a and b are zero, and the sign differs: return -0.0. |
| 3096 return -0.0; |
| 3097 } else { |
| 3098 return (a < b) ? a : b; |
| 3099 } |
| 3100 } |
| 3101 |
| 3102 template <typename T> |
| 3103 T Simulator::FPMinNM(T a, T b) { |
| 3104 if (IsQuietNaN(a) && !IsQuietNaN(b)) { |
| 3105 a = kFP64PositiveInfinity; |
| 3106 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { |
| 3107 b = kFP64PositiveInfinity; |
| 3108 } |
| 3109 |
| 3110 T result = FPProcessNaNs(a, b); |
| 3111 return std::isnan(result) ? result : FPMin(a, b); |
| 3112 } |
| 3113 |
| 3114 template <typename T> |
| 3115 T Simulator::FPRecipStepFused(T op1, T op2) { |
| 3116 const T two = 2.0; |
| 3117 if ((std::isinf(op1) && (op2 == 0.0)) || |
| 3118 ((op1 == 0.0) && (std::isinf(op2)))) { |
| 3119 return two; |
| 3120 } else if (std::isinf(op1) || std::isinf(op2)) { |
| 3121 // Return +inf if signs match, otherwise -inf. |
| 3122 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity |
| 3123 : kFP64NegativeInfinity; |
| 3124 } else { |
| 3125 return FusedMultiplyAdd(op1, op2, two); |
| 3126 } |
| 3127 } |
| 3128 |
| 3129 template <typename T> |
| 3130 T Simulator::FPRSqrtStepFused(T op1, T op2) { |
| 3131 const T one_point_five = 1.5; |
| 3132 const T two = 2.0; |
| 3133 |
| 3134 if ((std::isinf(op1) && (op2 == 0.0)) || |
| 3135 ((op1 == 0.0) && (std::isinf(op2)))) { |
| 3136 return one_point_five; |
| 3137 } else if (std::isinf(op1) || std::isinf(op2)) { |
| 3138 // Return +inf if signs match, otherwise -inf. |
| 3139 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity |
| 3140 : kFP64NegativeInfinity; |
| 3141 } else { |
| 3142 // The multiply-add-halve operation must be fully fused, so avoid interim |
| 3143 // rounding by checking which operand can be losslessly divided by two |
| 3144 // before doing the multiply-add. |
| 3145 if (std::isnormal(op1 / two)) { |
| 3146 return FusedMultiplyAdd(op1 / two, op2, one_point_five); |
| 3147 } else if (std::isnormal(op2 / two)) { |
| 3148 return FusedMultiplyAdd(op1, op2 / two, one_point_five); |
| 3149 } else { |
| 3150 // Neither operand is normal after halving: the result is dominated by |
| 3151 // the addition term, so just return that. |
| 3152 return one_point_five; |
| 3153 } |
| 3154 } |
| 3155 } |
| 3156 |
| 3157 double Simulator::FPRoundInt(double value, FPRounding round_mode) { |
| 3158 if ((value == 0.0) || (value == kFP64PositiveInfinity) || |
| 3159 (value == kFP64NegativeInfinity)) { |
| 3160 return value; |
| 3161 } else if (std::isnan(value)) { |
| 3162 return FPProcessNaN(value); |
| 3163 } |
| 3164 |
| 3165 double int_result = std::floor(value); |
| 3166 double error = value - int_result; |
| 3167 switch (round_mode) { |
| 3168 case FPTieAway: { |
| 3169 // Take care of correctly handling the range ]-0.5, -0.0], which must |
| 3170 // yield -0.0. |
| 3171 if ((-0.5 < value) && (value < 0.0)) { |
| 3172 int_result = -0.0; |
| 3173 |
| 3174 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) { |
| 3175 // If the error is greater than 0.5, or is equal to 0.5 and the integer |
| 3176 // result is positive, round up. |
| 3177 int_result++; |
| 3178 } |
| 3179 break; |
| 3180 } |
| 3181 case FPTieEven: { |
| 3182 // Take care of correctly handling the range [-0.5, -0.0], which must |
| 3183 // yield -0.0. |
| 3184 if ((-0.5 <= value) && (value < 0.0)) { |
| 3185 int_result = -0.0; |
| 3186 |
| 3187 // If the error is greater than 0.5, or is equal to 0.5 and the integer |
| 3188 // result is odd, round up. |
| 3189 } else if ((error > 0.5) || |
| 3190 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) { |
| 3191 int_result++; |
| 3192 } |
| 3193 break; |
| 3194 } |
| 3195 case FPZero: { |
| 3196 // If value>0 then we take floor(value) |
| 3197 // otherwise, ceil(value). |
| 3198 if (value < 0) { |
| 3199 int_result = ceil(value); |
| 3200 } |
| 3201 break; |
| 3202 } |
| 3203 case FPNegativeInfinity: { |
| 3204 // We always use floor(value). |
| 3205 break; |
| 3206 } |
| 3207 case FPPositiveInfinity: { |
| 3208 // Take care of correctly handling the range ]-1.0, -0.0], which must |
| 3209 // yield -0.0. |
| 3210 if ((-1.0 < value) && (value < 0.0)) { |
| 3211 int_result = -0.0; |
| 3212 |
| 3213 // If the error is non-zero, round up. |
| 3214 } else if (error > 0.0) { |
| 3215 int_result++; |
| 3216 } |
| 3217 break; |
| 3218 } |
| 3219 default: |
| 3220 UNIMPLEMENTED(); |
| 3221 } |
| 3222 return int_result; |
| 3223 } |
| 3224 |
| 3225 int32_t Simulator::FPToInt32(double value, FPRounding rmode) { |
| 3226 value = FPRoundInt(value, rmode); |
| 3227 if (value >= kWMaxInt) { |
| 3228 return kWMaxInt; |
| 3229 } else if (value < kWMinInt) { |
| 3230 return kWMinInt; |
| 3231 } |
| 3232 return std::isnan(value) ? 0 : static_cast<int32_t>(value); |
| 3233 } |
| 3234 |
| 3235 int64_t Simulator::FPToInt64(double value, FPRounding rmode) { |
| 3236 value = FPRoundInt(value, rmode); |
| 3237 if (value >= kXMaxInt) { |
| 3238 return kXMaxInt; |
| 3239 } else if (value < kXMinInt) { |
| 3240 return kXMinInt; |
| 3241 } |
| 3242 return std::isnan(value) ? 0 : static_cast<int64_t>(value); |
| 3243 } |
| 3244 |
| 3245 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) { |
| 3246 value = FPRoundInt(value, rmode); |
| 3247 if (value >= kWMaxUInt) { |
| 3248 return kWMaxUInt; |
| 3249 } else if (value < 0.0) { |
| 3250 return 0; |
| 3251 } |
| 3252 return std::isnan(value) ? 0 : static_cast<uint32_t>(value); |
| 3253 } |
| 3254 |
| 3255 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { |
| 3256 value = FPRoundInt(value, rmode); |
| 3257 if (value >= kXMaxUInt) { |
| 3258 return kXMaxUInt; |
| 3259 } else if (value < 0.0) { |
| 3260 return 0; |
| 3261 } |
| 3262 return std::isnan(value) ? 0 : static_cast<uint64_t>(value); |
| 3263 } |
| 3264 |
| 3265 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ |
| 3266 template <typename T> \ |
| 3267 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \ |
| 3268 const LogicVRegister& src1, \ |
| 3269 const LogicVRegister& src2) { \ |
| 3270 dst.ClearForWrite(vform); \ |
| 3271 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \ |
| 3272 T op1 = src1.Float<T>(i); \ |
| 3273 T op2 = src2.Float<T>(i); \ |
| 3274 T result; \ |
| 3275 if (PROCNAN) { \ |
| 3276 result = FPProcessNaNs(op1, op2); \ |
| 3277 if (!std::isnan(result)) { \ |
| 3278 result = OP(op1, op2); \ |
| 3279 } \ |
| 3280 } else { \ |
| 3281 result = OP(op1, op2); \ |
| 3282 } \ |
| 3283 dst.SetFloat(i, result); \ |
| 3284 } \ |
| 3285 return dst; \ |
| 3286 } \ |
| 3287 \ |
| 3288 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \ |
| 3289 const LogicVRegister& src1, \ |
| 3290 const LogicVRegister& src2) { \ |
| 3291 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { \ |
| 3292 FN<float>(vform, dst, src1, src2); \ |
| 3293 } else { \ |
| 3294 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); \ |
| 3295 FN<double>(vform, dst, src1, src2); \ |
| 3296 } \ |
| 3297 return dst; \ |
| 3298 } |
| 3299 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP) |
| 3300 #undef DEFINE_NEON_FP_VECTOR_OP |
| 3301 |
| 3302 LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst, |
| 3303 const LogicVRegister& src1, |
| 3304 const LogicVRegister& src2) { |
| 3305 SimVRegister temp; |
| 3306 LogicVRegister product = fmul(vform, temp, src1, src2); |
| 3307 return fneg(vform, dst, product); |
| 3308 } |
| 3309 |
| 3310 template <typename T> |
| 3311 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst, |
| 3312 const LogicVRegister& src1, |
| 3313 const LogicVRegister& src2) { |
| 3314 dst.ClearForWrite(vform); |
| 3315 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3316 T op1 = -src1.Float<T>(i); |
| 3317 T op2 = src2.Float<T>(i); |
| 3318 T result = FPProcessNaNs(op1, op2); |
| 3319 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2)); |
| 3320 } |
| 3321 return dst; |
| 3322 } |
| 3323 |
| 3324 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst, |
| 3325 const LogicVRegister& src1, |
| 3326 const LogicVRegister& src2) { |
| 3327 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3328 frecps<float>(vform, dst, src1, src2); |
| 3329 } else { |
| 3330 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3331 frecps<double>(vform, dst, src1, src2); |
| 3332 } |
| 3333 return dst; |
| 3334 } |
| 3335 |
| 3336 template <typename T> |
| 3337 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst, |
| 3338 const LogicVRegister& src1, |
| 3339 const LogicVRegister& src2) { |
| 3340 dst.ClearForWrite(vform); |
| 3341 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3342 T op1 = -src1.Float<T>(i); |
| 3343 T op2 = src2.Float<T>(i); |
| 3344 T result = FPProcessNaNs(op1, op2); |
| 3345 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2)); |
| 3346 } |
| 3347 return dst; |
| 3348 } |
| 3349 |
| 3350 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst, |
| 3351 const LogicVRegister& src1, |
| 3352 const LogicVRegister& src2) { |
| 3353 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3354 frsqrts<float>(vform, dst, src1, src2); |
| 3355 } else { |
| 3356 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3357 frsqrts<double>(vform, dst, src1, src2); |
| 3358 } |
| 3359 return dst; |
| 3360 } |
| 3361 |
| 3362 template <typename T> |
| 3363 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst, |
| 3364 const LogicVRegister& src1, |
| 3365 const LogicVRegister& src2, Condition cond) { |
| 3366 dst.ClearForWrite(vform); |
| 3367 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3368 bool result = false; |
| 3369 T op1 = src1.Float<T>(i); |
| 3370 T op2 = src2.Float<T>(i); |
| 3371 T nan_result = FPProcessNaNs(op1, op2); |
| 3372 if (!std::isnan(nan_result)) { |
| 3373 switch (cond) { |
| 3374 case eq: |
| 3375 result = (op1 == op2); |
| 3376 break; |
| 3377 case ge: |
| 3378 result = (op1 >= op2); |
| 3379 break; |
| 3380 case gt: |
| 3381 result = (op1 > op2); |
| 3382 break; |
| 3383 case le: |
| 3384 result = (op1 <= op2); |
| 3385 break; |
| 3386 case lt: |
| 3387 result = (op1 < op2); |
| 3388 break; |
| 3389 default: |
| 3390 UNREACHABLE(); |
| 3391 break; |
| 3392 } |
| 3393 } |
| 3394 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); |
| 3395 } |
| 3396 return dst; |
| 3397 } |
| 3398 |
| 3399 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst, |
| 3400 const LogicVRegister& src1, |
| 3401 const LogicVRegister& src2, Condition cond) { |
| 3402 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3403 fcmp<float>(vform, dst, src1, src2, cond); |
| 3404 } else { |
| 3405 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3406 fcmp<double>(vform, dst, src1, src2, cond); |
| 3407 } |
| 3408 return dst; |
| 3409 } |
| 3410 |
| 3411 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst, |
| 3412 const LogicVRegister& src, Condition cond) { |
| 3413 SimVRegister temp; |
| 3414 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3415 LogicVRegister zero_reg = |
| 3416 dup_immediate(vform, temp, bit_cast<uint32_t>(0.0f)); |
| 3417 fcmp<float>(vform, dst, src, zero_reg, cond); |
| 3418 } else { |
| 3419 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3420 LogicVRegister zero_reg = |
| 3421 dup_immediate(vform, temp, bit_cast<uint64_t>(0.0)); |
| 3422 fcmp<double>(vform, dst, src, zero_reg, cond); |
| 3423 } |
| 3424 return dst; |
| 3425 } |
| 3426 |
| 3427 LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst, |
| 3428 const LogicVRegister& src1, |
| 3429 const LogicVRegister& src2, Condition cond) { |
| 3430 SimVRegister temp1, temp2; |
| 3431 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3432 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1); |
| 3433 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2); |
| 3434 fcmp<float>(vform, dst, abs_src1, abs_src2, cond); |
| 3435 } else { |
| 3436 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3437 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1); |
| 3438 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2); |
| 3439 fcmp<double>(vform, dst, abs_src1, abs_src2, cond); |
| 3440 } |
| 3441 return dst; |
| 3442 } |
| 3443 |
| 3444 template <typename T> |
| 3445 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, |
| 3446 const LogicVRegister& src1, |
| 3447 const LogicVRegister& src2) { |
| 3448 dst.ClearForWrite(vform); |
| 3449 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3450 T op1 = src1.Float<T>(i); |
| 3451 T op2 = src2.Float<T>(i); |
| 3452 T acc = dst.Float<T>(i); |
| 3453 T result = FPMulAdd(acc, op1, op2); |
| 3454 dst.SetFloat(i, result); |
| 3455 } |
| 3456 return dst; |
| 3457 } |
| 3458 |
| 3459 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, |
| 3460 const LogicVRegister& src1, |
| 3461 const LogicVRegister& src2) { |
| 3462 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3463 fmla<float>(vform, dst, src1, src2); |
| 3464 } else { |
| 3465 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3466 fmla<double>(vform, dst, src1, src2); |
| 3467 } |
| 3468 return dst; |
| 3469 } |
| 3470 |
| 3471 template <typename T> |
| 3472 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, |
| 3473 const LogicVRegister& src1, |
| 3474 const LogicVRegister& src2) { |
| 3475 dst.ClearForWrite(vform); |
| 3476 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3477 T op1 = -src1.Float<T>(i); |
| 3478 T op2 = src2.Float<T>(i); |
| 3479 T acc = dst.Float<T>(i); |
| 3480 T result = FPMulAdd(acc, op1, op2); |
| 3481 dst.SetFloat(i, result); |
| 3482 } |
| 3483 return dst; |
| 3484 } |
| 3485 |
| 3486 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, |
| 3487 const LogicVRegister& src1, |
| 3488 const LogicVRegister& src2) { |
| 3489 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3490 fmls<float>(vform, dst, src1, src2); |
| 3491 } else { |
| 3492 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3493 fmls<double>(vform, dst, src1, src2); |
| 3494 } |
| 3495 return dst; |
| 3496 } |
| 3497 |
| 3498 template <typename T> |
| 3499 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst, |
| 3500 const LogicVRegister& src) { |
| 3501 dst.ClearForWrite(vform); |
| 3502 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3503 T op = src.Float<T>(i); |
| 3504 op = -op; |
| 3505 dst.SetFloat(i, op); |
| 3506 } |
| 3507 return dst; |
| 3508 } |
| 3509 |
| 3510 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst, |
| 3511 const LogicVRegister& src) { |
| 3512 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3513 fneg<float>(vform, dst, src); |
| 3514 } else { |
| 3515 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3516 fneg<double>(vform, dst, src); |
| 3517 } |
| 3518 return dst; |
| 3519 } |
| 3520 |
| 3521 template <typename T> |
| 3522 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst, |
| 3523 const LogicVRegister& src) { |
| 3524 dst.ClearForWrite(vform); |
| 3525 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3526 T op = src.Float<T>(i); |
| 3527 if (copysign(1.0, op) < 0.0) { |
| 3528 op = -op; |
| 3529 } |
| 3530 dst.SetFloat(i, op); |
| 3531 } |
| 3532 return dst; |
| 3533 } |
| 3534 |
| 3535 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst, |
| 3536 const LogicVRegister& src) { |
| 3537 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3538 fabs_<float>(vform, dst, src); |
| 3539 } else { |
| 3540 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3541 fabs_<double>(vform, dst, src); |
| 3542 } |
| 3543 return dst; |
| 3544 } |
| 3545 |
| 3546 LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst, |
| 3547 const LogicVRegister& src1, |
| 3548 const LogicVRegister& src2) { |
| 3549 SimVRegister temp; |
| 3550 fsub(vform, temp, src1, src2); |
| 3551 fabs_(vform, dst, temp); |
| 3552 return dst; |
| 3553 } |
| 3554 |
| 3555 LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst, |
| 3556 const LogicVRegister& src) { |
| 3557 dst.ClearForWrite(vform); |
| 3558 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3559 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3560 float result = FPSqrt(src.Float<float>(i)); |
| 3561 dst.SetFloat(i, result); |
| 3562 } |
| 3563 } else { |
| 3564 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3565 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3566 double result = FPSqrt(src.Float<double>(i)); |
| 3567 dst.SetFloat(i, result); |
| 3568 } |
| 3569 } |
| 3570 return dst; |
| 3571 } |
| 3572 |
| 3573 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \ |
| 3574 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \ |
| 3575 const LogicVRegister& src1, \ |
| 3576 const LogicVRegister& src2) { \ |
| 3577 SimVRegister temp1, temp2; \ |
| 3578 uzp1(vform, temp1, src1, src2); \ |
| 3579 uzp2(vform, temp2, src1, src2); \ |
| 3580 FN(vform, dst, temp1, temp2); \ |
| 3581 return dst; \ |
| 3582 } \ |
| 3583 \ |
| 3584 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \ |
| 3585 const LogicVRegister& src) { \ |
| 3586 if (vform == kFormatS) { \ |
| 3587 float result = OP(src.Float<float>(0), src.Float<float>(1)); \ |
| 3588 dst.SetFloat(0, result); \ |
| 3589 } else { \ |
| 3590 DCHECK_EQ(vform, kFormatD); \ |
| 3591 double result = OP(src.Float<double>(0), src.Float<double>(1)); \ |
| 3592 dst.SetFloat(0, result); \ |
| 3593 } \ |
| 3594 dst.ClearForWrite(vform); \ |
| 3595 return dst; \ |
| 3596 } |
| 3597 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP) |
| 3598 #undef DEFINE_NEON_FP_PAIR_OP |
| 3599 |
| 3600 LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst, |
| 3601 const LogicVRegister& src, FPMinMaxOp Op) { |
| 3602 DCHECK_EQ(vform, kFormat4S); |
| 3603 USE(vform); |
| 3604 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1)); |
| 3605 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3)); |
| 3606 float result = (this->*Op)(result1, result2); |
| 3607 dst.ClearForWrite(kFormatS); |
| 3608 dst.SetFloat<float>(0, result); |
| 3609 return dst; |
| 3610 } |
| 3611 |
| 3612 LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst, |
| 3613 const LogicVRegister& src) { |
| 3614 return FMinMaxV(vform, dst, src, &Simulator::FPMax); |
| 3615 } |
| 3616 |
| 3617 LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst, |
| 3618 const LogicVRegister& src) { |
| 3619 return FMinMaxV(vform, dst, src, &Simulator::FPMin); |
| 3620 } |
| 3621 |
| 3622 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst, |
| 3623 const LogicVRegister& src) { |
| 3624 return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM); |
| 3625 } |
| 3626 |
| 3627 LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst, |
| 3628 const LogicVRegister& src) { |
| 3629 return FMinMaxV(vform, dst, src, &Simulator::FPMinNM); |
| 3630 } |
| 3631 |
| 3632 LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst, |
| 3633 const LogicVRegister& src1, |
| 3634 const LogicVRegister& src2, int index) { |
| 3635 dst.ClearForWrite(vform); |
| 3636 SimVRegister temp; |
| 3637 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3638 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); |
| 3639 fmul<float>(vform, dst, src1, index_reg); |
| 3640 } else { |
| 3641 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3642 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); |
| 3643 fmul<double>(vform, dst, src1, index_reg); |
| 3644 } |
| 3645 return dst; |
| 3646 } |
| 3647 |
| 3648 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, |
| 3649 const LogicVRegister& src1, |
| 3650 const LogicVRegister& src2, int index) { |
| 3651 dst.ClearForWrite(vform); |
| 3652 SimVRegister temp; |
| 3653 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3654 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); |
| 3655 fmla<float>(vform, dst, src1, index_reg); |
| 3656 } else { |
| 3657 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3658 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); |
| 3659 fmla<double>(vform, dst, src1, index_reg); |
| 3660 } |
| 3661 return dst; |
| 3662 } |
| 3663 |
| 3664 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, |
| 3665 const LogicVRegister& src1, |
| 3666 const LogicVRegister& src2, int index) { |
| 3667 dst.ClearForWrite(vform); |
| 3668 SimVRegister temp; |
| 3669 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3670 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); |
| 3671 fmls<float>(vform, dst, src1, index_reg); |
| 3672 } else { |
| 3673 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3674 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); |
| 3675 fmls<double>(vform, dst, src1, index_reg); |
| 3676 } |
| 3677 return dst; |
| 3678 } |
| 3679 |
| 3680 LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst, |
| 3681 const LogicVRegister& src1, |
| 3682 const LogicVRegister& src2, int index) { |
| 3683 dst.ClearForWrite(vform); |
| 3684 SimVRegister temp; |
| 3685 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3686 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); |
| 3687 fmulx<float>(vform, dst, src1, index_reg); |
| 3688 |
| 3689 } else { |
| 3690 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3691 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); |
| 3692 fmulx<double>(vform, dst, src1, index_reg); |
| 3693 } |
| 3694 return dst; |
| 3695 } |
| 3696 |
| 3697 LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst, |
| 3698 const LogicVRegister& src, |
| 3699 FPRounding rounding_mode, |
| 3700 bool inexact_exception) { |
| 3701 dst.ClearForWrite(vform); |
| 3702 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3703 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3704 float input = src.Float<float>(i); |
| 3705 float rounded = FPRoundInt(input, rounding_mode); |
| 3706 if (inexact_exception && !std::isnan(input) && (input != rounded)) { |
| 3707 FPProcessException(); |
| 3708 } |
| 3709 dst.SetFloat<float>(i, rounded); |
| 3710 } |
| 3711 } else { |
| 3712 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3713 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3714 double input = src.Float<double>(i); |
| 3715 double rounded = FPRoundInt(input, rounding_mode); |
| 3716 if (inexact_exception && !std::isnan(input) && (input != rounded)) { |
| 3717 FPProcessException(); |
| 3718 } |
| 3719 dst.SetFloat<double>(i, rounded); |
| 3720 } |
| 3721 } |
| 3722 return dst; |
| 3723 } |
| 3724 |
| 3725 LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst, |
| 3726 const LogicVRegister& src, |
| 3727 FPRounding rounding_mode, int fbits) { |
| 3728 dst.ClearForWrite(vform); |
| 3729 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3730 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3731 float op = src.Float<float>(i) * std::pow(2.0f, fbits); |
| 3732 dst.SetInt(vform, i, FPToInt32(op, rounding_mode)); |
| 3733 } |
| 3734 } else { |
| 3735 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3736 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3737 double op = src.Float<double>(i) * std::pow(2.0, fbits); |
| 3738 dst.SetInt(vform, i, FPToInt64(op, rounding_mode)); |
| 3739 } |
| 3740 } |
| 3741 return dst; |
| 3742 } |
| 3743 |
| 3744 LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst, |
| 3745 const LogicVRegister& src, |
| 3746 FPRounding rounding_mode, int fbits) { |
| 3747 dst.ClearForWrite(vform); |
| 3748 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3749 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3750 float op = src.Float<float>(i) * std::pow(2.0f, fbits); |
| 3751 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode)); |
| 3752 } |
| 3753 } else { |
| 3754 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3755 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3756 double op = src.Float<double>(i) * std::pow(2.0, fbits); |
| 3757 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode)); |
| 3758 } |
| 3759 } |
| 3760 return dst; |
| 3761 } |
| 3762 |
| 3763 LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst, |
| 3764 const LogicVRegister& src) { |
| 3765 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3766 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { |
| 3767 dst.SetFloat(i, FPToFloat(src.Float<float16>(i))); |
| 3768 } |
| 3769 } else { |
| 3770 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3771 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { |
| 3772 dst.SetFloat(i, FPToDouble(src.Float<float>(i))); |
| 3773 } |
| 3774 } |
| 3775 return dst; |
| 3776 } |
| 3777 |
| 3778 LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst, |
| 3779 const LogicVRegister& src) { |
| 3780 int lane_count = LaneCountFromFormat(vform); |
| 3781 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3782 for (int i = 0; i < lane_count; i++) { |
| 3783 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count))); |
| 3784 } |
| 3785 } else { |
| 3786 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3787 for (int i = 0; i < lane_count; i++) { |
| 3788 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count))); |
| 3789 } |
| 3790 } |
| 3791 return dst; |
| 3792 } |
| 3793 |
| 3794 LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst, |
| 3795 const LogicVRegister& src) { |
| 3796 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) { |
| 3797 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3798 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven)); |
| 3799 } |
| 3800 } else { |
| 3801 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); |
| 3802 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3803 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven)); |
| 3804 } |
| 3805 } |
| 3806 return dst; |
| 3807 } |
| 3808 |
| 3809 LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst, |
| 3810 const LogicVRegister& src) { |
| 3811 int lane_count = LaneCountFromFormat(vform) / 2; |
| 3812 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) { |
| 3813 for (int i = lane_count - 1; i >= 0; i--) { |
| 3814 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven)); |
| 3815 } |
| 3816 } else { |
| 3817 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); |
| 3818 for (int i = lane_count - 1; i >= 0; i--) { |
| 3819 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven)); |
| 3820 } |
| 3821 } |
| 3822 return dst; |
| 3823 } |
| 3824 |
| 3825 LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst, |
| 3826 const LogicVRegister& src) { |
| 3827 dst.ClearForWrite(vform); |
| 3828 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); |
| 3829 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3830 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd)); |
| 3831 } |
| 3832 return dst; |
| 3833 } |
| 3834 |
| 3835 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst, |
| 3836 const LogicVRegister& src) { |
| 3837 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); |
| 3838 int lane_count = LaneCountFromFormat(vform) / 2; |
| 3839 for (int i = lane_count - 1; i >= 0; i--) { |
| 3840 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd)); |
| 3841 } |
| 3842 return dst; |
| 3843 } |
| 3844 |
| 3845 // Based on reference C function recip_sqrt_estimate from ARM ARM. |
| 3846 double Simulator::recip_sqrt_estimate(double a) { |
| 3847 int q0, q1, s; |
| 3848 double r; |
| 3849 if (a < 0.5) { |
| 3850 q0 = static_cast<int>(a * 512.0); |
| 3851 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0); |
| 3852 } else { |
| 3853 q1 = static_cast<int>(a * 256.0); |
| 3854 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0); |
| 3855 } |
| 3856 s = static_cast<int>(256.0 * r + 0.5); |
| 3857 return static_cast<double>(s) / 256.0; |
| 3858 } |
| 3859 |
| 3860 namespace { |
| 3861 |
| 3862 inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) { |
| 3863 return unsigned_bitextract_64(start_bit, end_bit, val); |
| 3864 } |
| 3865 |
| 3866 } // anonymous namespace |
| 3867 |
| 3868 template <typename T> |
| 3869 T Simulator::FPRecipSqrtEstimate(T op) { |
| 3870 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value, |
| 3871 "T must be a float or double"); |
| 3872 |
| 3873 if (std::isnan(op)) { |
| 3874 return FPProcessNaN(op); |
| 3875 } else if (op == 0.0) { |
| 3876 if (copysign(1.0, op) < 0.0) { |
| 3877 return kFP64NegativeInfinity; |
| 3878 } else { |
| 3879 return kFP64PositiveInfinity; |
| 3880 } |
| 3881 } else if (copysign(1.0, op) < 0.0) { |
| 3882 FPProcessException(); |
| 3883 return FPDefaultNaN<T>(); |
| 3884 } else if (std::isinf(op)) { |
| 3885 return 0.0; |
| 3886 } else { |
| 3887 uint64_t fraction; |
| 3888 int32_t exp, result_exp; |
| 3889 |
| 3890 if (sizeof(T) == sizeof(float)) { |
| 3891 exp = static_cast<int32_t>(float_exp(op)); |
| 3892 fraction = float_mantissa(op); |
| 3893 fraction <<= 29; |
| 3894 } else { |
| 3895 exp = static_cast<int32_t>(double_exp(op)); |
| 3896 fraction = double_mantissa(op); |
| 3897 } |
| 3898 |
| 3899 if (exp == 0) { |
| 3900 while (Bits(fraction, 51, 51) == 0) { |
| 3901 fraction = Bits(fraction, 50, 0) << 1; |
| 3902 exp -= 1; |
| 3903 } |
| 3904 fraction = Bits(fraction, 50, 0) << 1; |
| 3905 } |
| 3906 |
| 3907 double scaled; |
| 3908 if (Bits(exp, 0, 0) == 0) { |
| 3909 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44); |
| 3910 } else { |
| 3911 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44); |
| 3912 } |
| 3913 |
| 3914 if (sizeof(T) == sizeof(float)) { |
| 3915 result_exp = (380 - exp) / 2; |
| 3916 } else { |
| 3917 result_exp = (3068 - exp) / 2; |
| 3918 } |
| 3919 |
| 3920 uint64_t estimate = bit_cast<uint64_t>(recip_sqrt_estimate(scaled)); |
| 3921 |
| 3922 if (sizeof(T) == sizeof(float)) { |
| 3923 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); |
| 3924 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29)); |
| 3925 return float_pack(0, exp_bits, est_bits); |
| 3926 } else { |
| 3927 return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0)); |
| 3928 } |
| 3929 } |
| 3930 } |
| 3931 |
| 3932 LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst, |
| 3933 const LogicVRegister& src) { |
| 3934 dst.ClearForWrite(vform); |
| 3935 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 3936 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3937 float input = src.Float<float>(i); |
| 3938 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input)); |
| 3939 } |
| 3940 } else { |
| 3941 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 3942 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 3943 double input = src.Float<double>(i); |
| 3944 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input)); |
| 3945 } |
| 3946 } |
| 3947 return dst; |
| 3948 } |
| 3949 |
| 3950 template <typename T> |
| 3951 T Simulator::FPRecipEstimate(T op, FPRounding rounding) { |
| 3952 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value, |
| 3953 "T must be a float or double"); |
| 3954 uint32_t sign; |
| 3955 |
| 3956 if (sizeof(T) == sizeof(float)) { |
| 3957 sign = float_sign(op); |
| 3958 } else { |
| 3959 sign = double_sign(op); |
| 3960 } |
| 3961 |
| 3962 if (std::isnan(op)) { |
| 3963 return FPProcessNaN(op); |
| 3964 } else if (std::isinf(op)) { |
| 3965 return (sign == 1) ? -0.0 : 0.0; |
| 3966 } else if (op == 0.0) { |
| 3967 FPProcessException(); // FPExc_DivideByZero exception. |
| 3968 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; |
| 3969 } else if (((sizeof(T) == sizeof(float)) && |
| 3970 (std::fabs(op) < std::pow(2.0, -128.0))) || |
| 3971 ((sizeof(T) == sizeof(double)) && |
| 3972 (std::fabs(op) < std::pow(2.0, -1024.0)))) { |
| 3973 bool overflow_to_inf = false; |
| 3974 switch (rounding) { |
| 3975 case FPTieEven: |
| 3976 overflow_to_inf = true; |
| 3977 break; |
| 3978 case FPPositiveInfinity: |
| 3979 overflow_to_inf = (sign == 0); |
| 3980 break; |
| 3981 case FPNegativeInfinity: |
| 3982 overflow_to_inf = (sign == 1); |
| 3983 break; |
| 3984 case FPZero: |
| 3985 overflow_to_inf = false; |
| 3986 break; |
| 3987 default: |
| 3988 break; |
| 3989 } |
| 3990 FPProcessException(); // FPExc_Overflow and FPExc_Inexact. |
| 3991 if (overflow_to_inf) { |
| 3992 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; |
| 3993 } else { |
| 3994 // Return FPMaxNormal(sign). |
| 3995 if (sizeof(T) == sizeof(float)) { |
| 3996 return float_pack(sign, 0xfe, 0x07fffff); |
| 3997 } else { |
| 3998 return double_pack(sign, 0x7fe, 0x0fffffffffffffl); |
| 3999 } |
| 4000 } |
| 4001 } else { |
| 4002 uint64_t fraction; |
| 4003 int32_t exp, result_exp; |
| 4004 uint32_t sign; |
| 4005 |
| 4006 if (sizeof(T) == sizeof(float)) { |
| 4007 sign = float_sign(op); |
| 4008 exp = static_cast<int32_t>(float_exp(op)); |
| 4009 fraction = float_mantissa(op); |
| 4010 fraction <<= 29; |
| 4011 } else { |
| 4012 sign = double_sign(op); |
| 4013 exp = static_cast<int32_t>(double_exp(op)); |
| 4014 fraction = double_mantissa(op); |
| 4015 } |
| 4016 |
| 4017 if (exp == 0) { |
| 4018 if (Bits(fraction, 51, 51) == 0) { |
| 4019 exp -= 1; |
| 4020 fraction = Bits(fraction, 49, 0) << 2; |
| 4021 } else { |
| 4022 fraction = Bits(fraction, 50, 0) << 1; |
| 4023 } |
| 4024 } |
| 4025 |
| 4026 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44); |
| 4027 |
| 4028 if (sizeof(T) == sizeof(float)) { |
| 4029 result_exp = 253 - exp; |
| 4030 } else { |
| 4031 result_exp = 2045 - exp; |
| 4032 } |
| 4033 |
| 4034 double estimate = recip_estimate(scaled); |
| 4035 |
| 4036 fraction = double_mantissa(estimate); |
| 4037 if (result_exp == 0) { |
| 4038 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1); |
| 4039 } else if (result_exp == -1) { |
| 4040 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2); |
| 4041 result_exp = 0; |
| 4042 } |
| 4043 if (sizeof(T) == sizeof(float)) { |
| 4044 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); |
| 4045 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29)); |
| 4046 return float_pack(sign, exp_bits, frac_bits); |
| 4047 } else { |
| 4048 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0)); |
| 4049 } |
| 4050 } |
| 4051 } |
| 4052 |
| 4053 LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst, |
| 4054 const LogicVRegister& src, FPRounding round) { |
| 4055 dst.ClearForWrite(vform); |
| 4056 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 4057 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 4058 float input = src.Float<float>(i); |
| 4059 dst.SetFloat(i, FPRecipEstimate<float>(input, round)); |
| 4060 } |
| 4061 } else { |
| 4062 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 4063 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 4064 double input = src.Float<double>(i); |
| 4065 dst.SetFloat(i, FPRecipEstimate<double>(input, round)); |
| 4066 } |
| 4067 } |
| 4068 return dst; |
| 4069 } |
| 4070 |
| 4071 LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst, |
| 4072 const LogicVRegister& src) { |
| 4073 dst.ClearForWrite(vform); |
| 4074 uint64_t operand; |
| 4075 uint32_t result; |
| 4076 double dp_operand, dp_result; |
| 4077 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 4078 operand = src.Uint(vform, i); |
| 4079 if (operand <= 0x3FFFFFFF) { |
| 4080 result = 0xFFFFFFFF; |
| 4081 } else { |
| 4082 dp_operand = operand * std::pow(2.0, -32); |
| 4083 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31); |
| 4084 result = static_cast<uint32_t>(dp_result); |
| 4085 } |
| 4086 dst.SetUint(vform, i, result); |
| 4087 } |
| 4088 return dst; |
| 4089 } |
| 4090 |
| 4091 // Based on reference C function recip_estimate from ARM ARM. |
| 4092 double Simulator::recip_estimate(double a) { |
| 4093 int q, s; |
| 4094 double r; |
| 4095 q = static_cast<int>(a * 512.0); |
| 4096 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0); |
| 4097 s = static_cast<int>(256.0 * r + 0.5); |
| 4098 return static_cast<double>(s) / 256.0; |
| 4099 } |
| 4100 |
| 4101 LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst, |
| 4102 const LogicVRegister& src) { |
| 4103 dst.ClearForWrite(vform); |
| 4104 uint64_t operand; |
| 4105 uint32_t result; |
| 4106 double dp_operand, dp_result; |
| 4107 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 4108 operand = src.Uint(vform, i); |
| 4109 if (operand <= 0x7FFFFFFF) { |
| 4110 result = 0xFFFFFFFF; |
| 4111 } else { |
| 4112 dp_operand = operand * std::pow(2.0, -32); |
| 4113 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31); |
| 4114 result = static_cast<uint32_t>(dp_result); |
| 4115 } |
| 4116 dst.SetUint(vform, i, result); |
| 4117 } |
| 4118 return dst; |
| 4119 } |
| 4120 |
| 4121 template <typename T> |
| 4122 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst, |
| 4123 const LogicVRegister& src) { |
| 4124 dst.ClearForWrite(vform); |
| 4125 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 4126 T op = src.Float<T>(i); |
| 4127 T result; |
| 4128 if (std::isnan(op)) { |
| 4129 result = FPProcessNaN(op); |
| 4130 } else { |
| 4131 int exp; |
| 4132 uint32_t sign; |
| 4133 if (sizeof(T) == sizeof(float)) { |
| 4134 sign = float_sign(op); |
| 4135 exp = static_cast<int>(float_exp(op)); |
| 4136 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0)); |
| 4137 result = float_pack(sign, exp, 0); |
| 4138 } else { |
| 4139 sign = double_sign(op); |
| 4140 exp = static_cast<int>(double_exp(op)); |
| 4141 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0)); |
| 4142 result = double_pack(sign, exp, 0); |
| 4143 } |
| 4144 } |
| 4145 dst.SetFloat(i, result); |
| 4146 } |
| 4147 return dst; |
| 4148 } |
| 4149 |
| 4150 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst, |
| 4151 const LogicVRegister& src) { |
| 4152 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 4153 frecpx<float>(vform, dst, src); |
| 4154 } else { |
| 4155 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 4156 frecpx<double>(vform, dst, src); |
| 4157 } |
| 4158 return dst; |
| 4159 } |
| 4160 |
| 4161 LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst, |
| 4162 const LogicVRegister& src, int fbits, |
| 4163 FPRounding round) { |
| 4164 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 4165 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 4166 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round); |
| 4167 dst.SetFloat<float>(i, result); |
| 4168 } else { |
| 4169 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 4170 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round); |
| 4171 dst.SetFloat<double>(i, result); |
| 4172 } |
| 4173 } |
| 4174 return dst; |
| 4175 } |
| 4176 |
| 4177 LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst, |
| 4178 const LogicVRegister& src, int fbits, |
| 4179 FPRounding round) { |
| 4180 for (int i = 0; i < LaneCountFromFormat(vform); i++) { |
| 4181 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { |
| 4182 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round); |
| 4183 dst.SetFloat<float>(i, result); |
| 4184 } else { |
| 4185 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); |
| 4186 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round); |
| 4187 dst.SetFloat<double>(i, result); |
| 4188 } |
| 4189 } |
| 4190 return dst; |
| 4191 } |
| 4192 |
| 4193 #endif // USE_SIMULATOR |
| 4194 |
| 4195 } // namespace internal |
| 4196 } // namespace v8 |
| 4197 |
| 4198 #endif // V8_TARGET_ARCH_ARM64 |
OLD | NEW |