src/arm64/simulator-logic-arm64.cc - Issue 2812573003: Reland "ARM64: Add NEON support"

Side by Side Diff: src/arm64/simulator-logic-arm64.cc

Issue 2812573003: Reland "ARM64: Add NEON support" (Closed)

Patch Set: Add trace directory to gitignore Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 // Copyright 2016 the V8 project authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #if V8_TARGET_ARCH_ARM64

	6

	7 #include <cmath>

	8 #include "src/arm64/simulator-arm64.h"

	9

	10 namespace v8 {

	11 namespace internal {

	12

	13 #if defined(USE_SIMULATOR)

	14

	15 namespace {

	16

	17 // See FPRound for a description of this function.

	18 inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa,

	19 FPRounding round_mode) {

	20 uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(

	21 sign, exponent, mantissa, round_mode);

	22 return bit_cast<double>(bits);

	23 }

	24

	25 // See FPRound for a description of this function.

	26 inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa,

	27 FPRounding round_mode) {

	28 uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(

	29 sign, exponent, mantissa, round_mode);

	30 return bit_cast<float>(bits);

	31 }

	32

	33 // See FPRound for a description of this function.

	34 inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent,

	35 uint64_t mantissa, FPRounding round_mode) {

	36 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(

	37 sign, exponent, mantissa, round_mode);

	38 }

	39

	40 } // namespace

	41

	42 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {

	43 if (src >= 0) {

	44 return UFixedToDouble(src, fbits, round);

	45 } else if (src == INT64_MIN) {

	46 return -UFixedToDouble(src, fbits, round);

	47 } else {

	48 return -UFixedToDouble(-src, fbits, round);

	49 }

	50 }

	51

	52 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {

	53 // An input of 0 is a special case because the result is effectively

	54 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.

	55 if (src == 0) {

	56 return 0.0;

	57 }

	58

	59 // Calculate the exponent. The highest significant bit will have the value

	60 // 2^exponent.

	61 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);

	62 const int64_t exponent = highest_significant_bit - fbits;

	63

	64 return FPRoundToDouble(0, exponent, src, round);

	65 }

	66

	67 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {

	68 if (src >= 0) {

	69 return UFixedToFloat(src, fbits, round);

	70 } else if (src == INT64_MIN) {

	71 return -UFixedToFloat(src, fbits, round);

	72 } else {

	73 return -UFixedToFloat(-src, fbits, round);

	74 }

	75 }

	76

	77 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {

	78 // An input of 0 is a special case because the result is effectively

	79 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.

	80 if (src == 0) {

	81 return 0.0f;

	82 }

	83

	84 // Calculate the exponent. The highest significant bit will have the value

	85 // 2^exponent.

	86 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);

	87 const int32_t exponent = highest_significant_bit - fbits;

	88

	89 return FPRoundToFloat(0, exponent, src, round);

	90 }

	91

	92 double Simulator::FPToDouble(float value) {

	93 switch (std::fpclassify(value)) {

	94 case FP_NAN: {

	95 if (IsSignallingNaN(value)) {

	96 FPProcessException();

	97 }

	98 if (DN()) return kFP64DefaultNaN;

	99

	100 // Convert NaNs as the processor would:

	101 // - The sign is propagated.

	102 // - The mantissa is transferred entirely, except that the top bit is

	103 // forced to '1', making the result a quiet NaN. The unused (low-order)

	104 // mantissa bits are set to 0.

	105 uint32_t raw = bit_cast<uint32_t>(value);

	106

	107 uint64_t sign = raw >> 31;

	108 uint64_t exponent = (1 << kDoubleExponentBits) - 1;

	109 uint64_t mantissa = unsigned_bitextract_64(21, 0, raw);

	110

	111 // Unused low-order bits remain zero.

	112 mantissa <<= (kDoubleMantissaBits - kFloatMantissaBits);

	113

	114 // Force a quiet NaN.

	115 mantissa \|= (UINT64_C(1) << (kDoubleMantissaBits - 1));

	116

	117 return double_pack(sign, exponent, mantissa);

	118 }

	119

	120 case FP_ZERO:

	121 case FP_NORMAL:

	122 case FP_SUBNORMAL:

	123 case FP_INFINITE: {

	124 // All other inputs are preserved in a standard cast, because every value

	125 // representable using an IEEE-754 float is also representable using an

	126 // IEEE-754 double.

	127 return static_cast<double>(value);

	128 }

	129 }

	130

	131 UNREACHABLE();

	132 return kFP64DefaultNaN;

	133 }

	134

	135 float Simulator::FPToFloat(float16 value) {

	136 uint32_t sign = value >> 15;

	137 uint32_t exponent =

	138 unsigned_bitextract_32(kFloat16MantissaBits + kFloat16ExponentBits - 1,

	139 kFloat16MantissaBits, value);

	140 uint32_t mantissa =

	141 unsigned_bitextract_32(kFloat16MantissaBits - 1, 0, value);

	142

	143 switch (float16classify(value)) {

	144 case FP_ZERO:

	145 return (sign == 0) ? 0.0f : -0.0f;

	146

	147 case FP_INFINITE:

	148 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;

	149

	150 case FP_SUBNORMAL: {

	151 // Calculate shift required to put mantissa into the most-significant bits

	152 // of the destination mantissa.

	153 int shift = CountLeadingZeros(mantissa << (32 - 10), 32);

	154

	155 // Shift mantissa and discard implicit '1'.

	156 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;

	157 mantissa &= (1 << kFloatMantissaBits) - 1;

	158

	159 // Adjust the exponent for the shift applied, and rebias.

	160 exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias);

	161 break;

	162 }

	163

	164 case FP_NAN: {

	165 if (IsSignallingNaN(value)) {

	166 FPProcessException();

	167 }

	168 if (DN()) return kFP32DefaultNaN;

	169

	170 // Convert NaNs as the processor would:

	171 // - The sign is propagated.

	172 // - The mantissa is transferred entirely, except that the top bit is

	173 // forced to '1', making the result a quiet NaN. The unused (low-order)

	174 // mantissa bits are set to 0.

	175 exponent = (1 << kFloatExponentBits) - 1;

	176

	177 // Increase bits in mantissa, making low-order bits 0.

	178 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);

	179 mantissa \|= 1 << (kFloatMantissaBits - 1); // Force a quiet NaN.

	180 break;

	181 }

	182

	183 case FP_NORMAL: {

	184 // Increase bits in mantissa, making low-order bits 0.

	185 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);

	186

	187 // Change exponent bias.

	188 exponent += (kFloatExponentBias - kFloat16ExponentBias);

	189 break;

	190 }

	191

	192 default:

	193 UNREACHABLE();

	194 return kFP32DefaultNaN;

	195 }

	196 return float_pack(sign, exponent, mantissa);

	197 }

	198

	199 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {

	200 // Only the FPTieEven rounding mode is implemented.

	201 DCHECK_EQ(round_mode, FPTieEven);

	202 USE(round_mode);

	203

	204 int64_t sign = float_sign(value);

	205 int64_t exponent =

	206 static_cast<int64_t>(float_exp(value)) - kFloatExponentBias;

	207 uint32_t mantissa = float_mantissa(value);

	208

	209 switch (std::fpclassify(value)) {

	210 case FP_NAN: {

	211 if (IsSignallingNaN(value)) {

	212 FPProcessException();

	213 }

	214 if (DN()) return kFP16DefaultNaN;

	215

	216 // Convert NaNs as the processor would:

	217 // - The sign is propagated.

	218 // - The mantissa is transferred as much as possible, except that the top

	219 // bit is forced to '1', making the result a quiet NaN.

	220 float16 result =

	221 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;

	222 result \|= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);

	223 result \|= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;

	224 return result;

	225 }

	226

	227 case FP_ZERO:

	228 return (sign == 0) ? 0 : 0x8000;

	229

	230 case FP_INFINITE:

	231 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;

	232

	233 case FP_NORMAL:

	234 case FP_SUBNORMAL: {

	235 // Convert float-to-half as the processor would, assuming that FPCR.FZ

	236 // (flush-to-zero) is not set.

	237

	238 // Add the implicit '1' bit to the mantissa.

	239 mantissa += (1 << kFloatMantissaBits);

	240 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);

	241 }

	242 }

	243

	244 UNREACHABLE();

	245 return kFP16DefaultNaN;

	246 }

	247

	248 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {

	249 // Only the FPTieEven rounding mode is implemented.

	250 DCHECK_EQ(round_mode, FPTieEven);

	251 USE(round_mode);

	252

	253 int64_t sign = double_sign(value);

	254 int64_t exponent =

	255 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;

	256 uint64_t mantissa = double_mantissa(value);

	257

	258 switch (std::fpclassify(value)) {

	259 case FP_NAN: {

	260 if (IsSignallingNaN(value)) {

	261 FPProcessException();

	262 }

	263 if (DN()) return kFP16DefaultNaN;

	264

	265 // Convert NaNs as the processor would:

	266 // - The sign is propagated.

	267 // - The mantissa is transferred as much as possible, except that the top

	268 // bit is forced to '1', making the result a quiet NaN.

	269 float16 result =

	270 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;

	271 result \|= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);

	272 result \|= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;

	273 return result;

	274 }

	275

	276 case FP_ZERO:

	277 return (sign == 0) ? 0 : 0x8000;

	278

	279 case FP_INFINITE:

	280 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;

	281

	282 case FP_NORMAL:

	283 case FP_SUBNORMAL: {

	284 // Convert double-to-half as the processor would, assuming that FPCR.FZ

	285 // (flush-to-zero) is not set.

	286

	287 // Add the implicit '1' bit to the mantissa.

	288 mantissa += (UINT64_C(1) << kDoubleMantissaBits);

	289 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);

	290 }

	291 }

	292

	293 UNREACHABLE();

	294 return kFP16DefaultNaN;

	295 }

	296

	297 float Simulator::FPToFloat(double value, FPRounding round_mode) {

	298 // Only the FPTieEven rounding mode is implemented.

	299 DCHECK((round_mode == FPTieEven) \|\| (round_mode == FPRoundOdd));

	300 USE(round_mode);

	301

	302 switch (std::fpclassify(value)) {

	303 case FP_NAN: {

	304 if (IsSignallingNaN(value)) {

	305 FPProcessException();

	306 }

	307 if (DN()) return kFP32DefaultNaN;

	308

	309 // Convert NaNs as the processor would:

	310 // - The sign is propagated.

	311 // - The mantissa is transferred as much as possible, except that the

	312 // top bit is forced to '1', making the result a quiet NaN.

	313

	314 uint64_t raw = bit_cast<uint64_t>(value);

	315

	316 uint32_t sign = raw >> 63;

	317 uint32_t exponent = (1 << 8) - 1;

	318 uint32_t mantissa = static_cast<uint32_t>(unsigned_bitextract_64(

	319 50, kDoubleMantissaBits - kFloatMantissaBits, raw));

	320 mantissa \|= (1 << (kFloatMantissaBits - 1)); // Force a quiet NaN.

	321

	322 return float_pack(sign, exponent, mantissa);

	323 }

	324

	325 case FP_ZERO:

	326 case FP_INFINITE: {

	327 // In a C++ cast, any value representable in the target type will be

	328 // unchanged. This is always the case for +/-0.0 and infinities.

	329 return static_cast<float>(value);

	330 }

	331

	332 case FP_NORMAL:

	333 case FP_SUBNORMAL: {

	334 // Convert double-to-float as the processor would, assuming that FPCR.FZ

	335 // (flush-to-zero) is not set.

	336 uint32_t sign = double_sign(value);

	337 int64_t exponent =

	338 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;

	339 uint64_t mantissa = double_mantissa(value);

	340 if (std::fpclassify(value) == FP_NORMAL) {

	341 // For normal FP values, add the hidden bit.

	342 mantissa \|= (UINT64_C(1) << kDoubleMantissaBits);

	343 }

	344 return FPRoundToFloat(sign, exponent, mantissa, round_mode);

	345 }

	346 }

	347

	348 UNREACHABLE();

	349 return kFP32DefaultNaN;

	350 }

	351

	352 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {

	353 dst.ClearForWrite(vform);

	354 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	355 dst.ReadUintFromMem(vform, i, addr);

	356 addr += LaneSizeInBytesFromFormat(vform);

	357 }

	358 }

	359

	360 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index,

	361 uint64_t addr) {

	362 dst.ReadUintFromMem(vform, index, addr);

	363 }

	364

	365 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {

	366 dst.ClearForWrite(vform);

	367 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	368 dst.ReadUintFromMem(vform, i, addr);

	369 }

	370 }

	371

	372 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,

	373 LogicVRegister dst2, uint64_t addr1) {

	374 dst1.ClearForWrite(vform);

	375 dst2.ClearForWrite(vform);

	376 int esize = LaneSizeInBytesFromFormat(vform);

	377 uint64_t addr2 = addr1 + esize;

	378 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	379 dst1.ReadUintFromMem(vform, i, addr1);

	380 dst2.ReadUintFromMem(vform, i, addr2);

	381 addr1 += 2 * esize;

	382 addr2 += 2 * esize;

	383 }

	384 }

	385

	386 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,

	387 LogicVRegister dst2, int index, uint64_t addr1) {

	388 dst1.ClearForWrite(vform);

	389 dst2.ClearForWrite(vform);

	390 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);

	391 dst1.ReadUintFromMem(vform, index, addr1);

	392 dst2.ReadUintFromMem(vform, index, addr2);

	393 }

	394

	395 void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1,

	396 LogicVRegister dst2, uint64_t addr) {

	397 dst1.ClearForWrite(vform);

	398 dst2.ClearForWrite(vform);

	399 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);

	400 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	401 dst1.ReadUintFromMem(vform, i, addr);

	402 dst2.ReadUintFromMem(vform, i, addr2);

	403 }

	404 }

	405

	406 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,

	407 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) {

	408 dst1.ClearForWrite(vform);

	409 dst2.ClearForWrite(vform);

	410 dst3.ClearForWrite(vform);

	411 int esize = LaneSizeInBytesFromFormat(vform);

	412 uint64_t addr2 = addr1 + esize;

	413 uint64_t addr3 = addr2 + esize;

	414 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	415 dst1.ReadUintFromMem(vform, i, addr1);

	416 dst2.ReadUintFromMem(vform, i, addr2);

	417 dst3.ReadUintFromMem(vform, i, addr3);

	418 addr1 += 3 * esize;

	419 addr2 += 3 * esize;

	420 addr3 += 3 * esize;

	421 }

	422 }

	423

	424 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,

	425 LogicVRegister dst2, LogicVRegister dst3, int index,

	426 uint64_t addr1) {

	427 dst1.ClearForWrite(vform);

	428 dst2.ClearForWrite(vform);

	429 dst3.ClearForWrite(vform);

	430 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);

	431 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);

	432 dst1.ReadUintFromMem(vform, index, addr1);

	433 dst2.ReadUintFromMem(vform, index, addr2);

	434 dst3.ReadUintFromMem(vform, index, addr3);

	435 }

	436

	437 void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1,

	438 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) {

	439 dst1.ClearForWrite(vform);

	440 dst2.ClearForWrite(vform);

	441 dst3.ClearForWrite(vform);

	442 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);

	443 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);

	444 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	445 dst1.ReadUintFromMem(vform, i, addr);

	446 dst2.ReadUintFromMem(vform, i, addr2);

	447 dst3.ReadUintFromMem(vform, i, addr3);

	448 }

	449 }

	450

	451 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,

	452 LogicVRegister dst2, LogicVRegister dst3,

	453 LogicVRegister dst4, uint64_t addr1) {

	454 dst1.ClearForWrite(vform);

	455 dst2.ClearForWrite(vform);

	456 dst3.ClearForWrite(vform);

	457 dst4.ClearForWrite(vform);

	458 int esize = LaneSizeInBytesFromFormat(vform);

	459 uint64_t addr2 = addr1 + esize;

	460 uint64_t addr3 = addr2 + esize;

	461 uint64_t addr4 = addr3 + esize;

	462 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	463 dst1.ReadUintFromMem(vform, i, addr1);

	464 dst2.ReadUintFromMem(vform, i, addr2);

	465 dst3.ReadUintFromMem(vform, i, addr3);

	466 dst4.ReadUintFromMem(vform, i, addr4);

	467 addr1 += 4 * esize;

	468 addr2 += 4 * esize;

	469 addr3 += 4 * esize;

	470 addr4 += 4 * esize;

	471 }

	472 }

	473

	474 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,

	475 LogicVRegister dst2, LogicVRegister dst3,

	476 LogicVRegister dst4, int index, uint64_t addr1) {

	477 dst1.ClearForWrite(vform);

	478 dst2.ClearForWrite(vform);

	479 dst3.ClearForWrite(vform);

	480 dst4.ClearForWrite(vform);

	481 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);

	482 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);

	483 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);

	484 dst1.ReadUintFromMem(vform, index, addr1);

	485 dst2.ReadUintFromMem(vform, index, addr2);

	486 dst3.ReadUintFromMem(vform, index, addr3);

	487 dst4.ReadUintFromMem(vform, index, addr4);

	488 }

	489

	490 void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1,

	491 LogicVRegister dst2, LogicVRegister dst3,

	492 LogicVRegister dst4, uint64_t addr) {

	493 dst1.ClearForWrite(vform);

	494 dst2.ClearForWrite(vform);

	495 dst3.ClearForWrite(vform);

	496 dst4.ClearForWrite(vform);

	497 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);

	498 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);

	499 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);

	500 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	501 dst1.ReadUintFromMem(vform, i, addr);

	502 dst2.ReadUintFromMem(vform, i, addr2);

	503 dst3.ReadUintFromMem(vform, i, addr3);

	504 dst4.ReadUintFromMem(vform, i, addr4);

	505 }

	506 }

	507

	508 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {

	509 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	510 src.WriteUintToMem(vform, i, addr);

	511 addr += LaneSizeInBytesFromFormat(vform);

	512 }

	513 }

	514

	515 void Simulator::st1(VectorFormat vform, LogicVRegister src, int index,

	516 uint64_t addr) {

	517 src.WriteUintToMem(vform, index, addr);

	518 }

	519

	520 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,

	521 uint64_t addr) {

	522 int esize = LaneSizeInBytesFromFormat(vform);

	523 uint64_t addr2 = addr + esize;

	524 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	525 dst.WriteUintToMem(vform, i, addr);

	526 dst2.WriteUintToMem(vform, i, addr2);

	527 addr += 2 * esize;

	528 addr2 += 2 * esize;

	529 }

	530 }

	531

	532 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,

	533 int index, uint64_t addr) {

	534 int esize = LaneSizeInBytesFromFormat(vform);

	535 dst.WriteUintToMem(vform, index, addr);

	536 dst2.WriteUintToMem(vform, index, addr + 1 * esize);

	537 }

	538

	539 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,

	540 LogicVRegister dst3, uint64_t addr) {

	541 int esize = LaneSizeInBytesFromFormat(vform);

	542 uint64_t addr2 = addr + esize;

	543 uint64_t addr3 = addr2 + esize;

	544 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	545 dst.WriteUintToMem(vform, i, addr);

	546 dst2.WriteUintToMem(vform, i, addr2);

	547 dst3.WriteUintToMem(vform, i, addr3);

	548 addr += 3 * esize;

	549 addr2 += 3 * esize;

	550 addr3 += 3 * esize;

	551 }

	552 }

	553

	554 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,

	555 LogicVRegister dst3, int index, uint64_t addr) {

	556 int esize = LaneSizeInBytesFromFormat(vform);

	557 dst.WriteUintToMem(vform, index, addr);

	558 dst2.WriteUintToMem(vform, index, addr + 1 * esize);

	559 dst3.WriteUintToMem(vform, index, addr + 2 * esize);

	560 }

	561

	562 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,

	563 LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) {

	564 int esize = LaneSizeInBytesFromFormat(vform);

	565 uint64_t addr2 = addr + esize;

	566 uint64_t addr3 = addr2 + esize;

	567 uint64_t addr4 = addr3 + esize;

	568 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	569 dst.WriteUintToMem(vform, i, addr);

	570 dst2.WriteUintToMem(vform, i, addr2);

	571 dst3.WriteUintToMem(vform, i, addr3);

	572 dst4.WriteUintToMem(vform, i, addr4);

	573 addr += 4 * esize;

	574 addr2 += 4 * esize;

	575 addr3 += 4 * esize;

	576 addr4 += 4 * esize;

	577 }

	578 }

	579

	580 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,

	581 LogicVRegister dst3, LogicVRegister dst4, int index,

	582 uint64_t addr) {

	583 int esize = LaneSizeInBytesFromFormat(vform);

	584 dst.WriteUintToMem(vform, index, addr);

	585 dst2.WriteUintToMem(vform, index, addr + 1 * esize);

	586 dst3.WriteUintToMem(vform, index, addr + 2 * esize);

	587 dst4.WriteUintToMem(vform, index, addr + 3 * esize);

	588 }

	589

	590 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,

	591 const LogicVRegister& src1,

	592 const LogicVRegister& src2, Condition cond) {

	593 dst.ClearForWrite(vform);

	594 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	595 int64_t sa = src1.Int(vform, i);

	596 int64_t sb = src2.Int(vform, i);

	597 uint64_t ua = src1.Uint(vform, i);

	598 uint64_t ub = src2.Uint(vform, i);

	599 bool result = false;

	600 switch (cond) {

	601 case eq:

	602 result = (ua == ub);

	603 break;

	604 case ge:

	605 result = (sa >= sb);

	606 break;

	607 case gt:

	608 result = (sa > sb);

	609 break;

	610 case hi:

	611 result = (ua > ub);

	612 break;

	613 case hs:

	614 result = (ua >= ub);

	615 break;

	616 case lt:

	617 result = (sa < sb);

	618 break;

	619 case le:

	620 result = (sa <= sb);

	621 break;

	622 default:

	623 UNREACHABLE();

	624 break;

	625 }

	626 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);

	627 }

	628 return dst;

	629 }

	630

	631 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,

	632 const LogicVRegister& src1, int imm,

	633 Condition cond) {

	634 SimVRegister temp;

	635 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);

	636 return cmp(vform, dst, src1, imm_reg, cond);

	637 }

	638

	639 LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst,

	640 const LogicVRegister& src1,

	641 const LogicVRegister& src2) {

	642 dst.ClearForWrite(vform);

	643 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	644 uint64_t ua = src1.Uint(vform, i);

	645 uint64_t ub = src2.Uint(vform, i);

	646 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);

	647 }

	648 return dst;

	649 }

	650

	651 LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst,

	652 const LogicVRegister& src1,

	653 const LogicVRegister& src2) {

	654 int lane_size = LaneSizeInBitsFromFormat(vform);

	655 dst.ClearForWrite(vform);

	656 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	657 // Test for unsigned saturation.

	658 uint64_t ua = src1.UintLeftJustified(vform, i);

	659 uint64_t ub = src2.UintLeftJustified(vform, i);

	660 uint64_t ur = ua + ub;

	661 if (ur < ua) {

	662 dst.SetUnsignedSat(i, true);

	663 }

	664

	665 // Test for signed saturation.

	666 bool pos_a = (ua >> 63) == 0;

	667 bool pos_b = (ub >> 63) == 0;

	668 bool pos_r = (ur >> 63) == 0;

	669 // If the signs of the operands are the same, but different from the result,

	670 // there was an overflow.

	671 if ((pos_a == pos_b) && (pos_a != pos_r)) {

	672 dst.SetSignedSat(i, pos_a);

	673 }

	674

	675 dst.SetInt(vform, i, ur >> (64 - lane_size));

	676 }

	677 return dst;

	678 }

	679

	680 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,

	681 const LogicVRegister& src1,

	682 const LogicVRegister& src2) {

	683 SimVRegister temp1, temp2;

	684 uzp1(vform, temp1, src1, src2);

	685 uzp2(vform, temp2, src1, src2);

	686 add(vform, dst, temp1, temp2);

	687 return dst;

	688 }

	689

	690 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,

	691 const LogicVRegister& src1,

	692 const LogicVRegister& src2) {

	693 SimVRegister temp;

	694 mul(vform, temp, src1, src2);

	695 add(vform, dst, dst, temp);

	696 return dst;

	697 }

	698

	699 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,

	700 const LogicVRegister& src1,

	701 const LogicVRegister& src2) {

	702 SimVRegister temp;

	703 mul(vform, temp, src1, src2);

	704 sub(vform, dst, dst, temp);

	705 return dst;

	706 }

	707

	708 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,

	709 const LogicVRegister& src1,

	710 const LogicVRegister& src2) {

	711 dst.ClearForWrite(vform);

	712 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	713 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));

	714 }

	715 return dst;

	716 }

	717

	718 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,

	719 const LogicVRegister& src1,

	720 const LogicVRegister& src2, int index) {

	721 SimVRegister temp;

	722 VectorFormat indexform = VectorFormatFillQ(vform);

	723 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));

	724 }

	725

	726 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,

	727 const LogicVRegister& src1,

	728 const LogicVRegister& src2, int index) {

	729 SimVRegister temp;

	730 VectorFormat indexform = VectorFormatFillQ(vform);

	731 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));

	732 }

	733

	734 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,

	735 const LogicVRegister& src1,

	736 const LogicVRegister& src2, int index) {

	737 SimVRegister temp;

	738 VectorFormat indexform = VectorFormatFillQ(vform);

	739 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));

	740 }

	741

	742 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,

	743 const LogicVRegister& src1,

	744 const LogicVRegister& src2, int index) {

	745 SimVRegister temp;

	746 VectorFormat indexform =

	747 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	748 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));

	749 }

	750

	751 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,

	752 const LogicVRegister& src1,

	753 const LogicVRegister& src2, int index) {

	754 SimVRegister temp;

	755 VectorFormat indexform =

	756 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	757 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	758 }

	759

	760 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,

	761 const LogicVRegister& src1,

	762 const LogicVRegister& src2, int index) {

	763 SimVRegister temp;

	764 VectorFormat indexform =

	765 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	766 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));

	767 }

	768

	769 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,

	770 const LogicVRegister& src1,

	771 const LogicVRegister& src2, int index) {

	772 SimVRegister temp;

	773 VectorFormat indexform =

	774 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	775 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	776 }

	777

	778 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,

	779 const LogicVRegister& src1,

	780 const LogicVRegister& src2, int index) {

	781 SimVRegister temp;

	782 VectorFormat indexform =

	783 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	784 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));

	785 }

	786

	787 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,

	788 const LogicVRegister& src1,

	789 const LogicVRegister& src2, int index) {

	790 SimVRegister temp;

	791 VectorFormat indexform =

	792 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	793 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	794 }

	795

	796 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,

	797 const LogicVRegister& src1,

	798 const LogicVRegister& src2, int index) {

	799 SimVRegister temp;

	800 VectorFormat indexform =

	801 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	802 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));

	803 }

	804

	805 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,

	806 const LogicVRegister& src1,

	807 const LogicVRegister& src2, int index) {

	808 SimVRegister temp;

	809 VectorFormat indexform =

	810 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	811 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	812 }

	813

	814 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,

	815 const LogicVRegister& src1,

	816 const LogicVRegister& src2, int index) {

	817 SimVRegister temp;

	818 VectorFormat indexform =

	819 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	820 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));

	821 }

	822

	823 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,

	824 const LogicVRegister& src1,

	825 const LogicVRegister& src2, int index) {

	826 SimVRegister temp;

	827 VectorFormat indexform =

	828 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	829 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	830 }

	831

	832 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,

	833 const LogicVRegister& src1,

	834 const LogicVRegister& src2, int index) {

	835 SimVRegister temp;

	836 VectorFormat indexform =

	837 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	838 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));

	839 }

	840

	841 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,

	842 const LogicVRegister& src1,

	843 const LogicVRegister& src2, int index) {

	844 SimVRegister temp;

	845 VectorFormat indexform =

	846 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	847 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	848 }

	849

	850 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,

	851 const LogicVRegister& src1,

	852 const LogicVRegister& src2, int index) {

	853 SimVRegister temp;

	854 VectorFormat indexform =

	855 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	856 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));

	857 }

	858

	859 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,

	860 const LogicVRegister& src1,

	861 const LogicVRegister& src2, int index) {

	862 SimVRegister temp;

	863 VectorFormat indexform =

	864 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	865 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	866 }

	867

	868 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,

	869 const LogicVRegister& src1,

	870 const LogicVRegister& src2, int index) {

	871 SimVRegister temp;

	872 VectorFormat indexform =

	873 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	874 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));

	875 }

	876

	877 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,

	878 const LogicVRegister& src1,

	879 const LogicVRegister& src2, int index) {

	880 SimVRegister temp;

	881 VectorFormat indexform =

	882 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	883 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	884 }

	885

	886 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,

	887 const LogicVRegister& src1,

	888 const LogicVRegister& src2, int index) {

	889 SimVRegister temp;

	890 VectorFormat indexform =

	891 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	892 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));

	893 }

	894

	895 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,

	896 const LogicVRegister& src1,

	897 const LogicVRegister& src2, int index) {

	898 SimVRegister temp;

	899 VectorFormat indexform =

	900 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	901 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	902 }

	903

	904 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,

	905 const LogicVRegister& src1,

	906 const LogicVRegister& src2, int index) {

	907 SimVRegister temp;

	908 VectorFormat indexform = VectorFormatFillQ(vform);

	909 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));

	910 }

	911

	912 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,

	913 const LogicVRegister& src1,

	914 const LogicVRegister& src2, int index) {

	915 SimVRegister temp;

	916 VectorFormat indexform = VectorFormatFillQ(vform);

	917 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));

	918 }

	919

	920 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {

	921 uint16_t result = 0;

	922 uint16_t extended_op2 = op2;

	923 for (int i = 0; i < 8; ++i) {

	924 if ((op1 >> i) & 1) {

	925 result = result ^ (extended_op2 << i);

	926 }

	927 }

	928 return result;

	929 }

	930

	931 LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst,

	932 const LogicVRegister& src1,

	933 const LogicVRegister& src2) {

	934 dst.ClearForWrite(vform);

	935 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	936 dst.SetUint(vform, i,

	937 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));

	938 }

	939 return dst;

	940 }

	941

	942 LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst,

	943 const LogicVRegister& src1,

	944 const LogicVRegister& src2) {

	945 VectorFormat vform_src = VectorFormatHalfWidth(vform);

	946 dst.ClearForWrite(vform);

	947 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	948 dst.SetUint(

	949 vform, i,

	950 PolynomialMult(src1.Uint(vform_src, i), src2.Uint(vform_src, i)));

	951 }

	952 return dst;

	953 }

	954

	955 LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst,

	956 const LogicVRegister& src1,

	957 const LogicVRegister& src2) {

	958 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);

	959 dst.ClearForWrite(vform);

	960 int lane_count = LaneCountFromFormat(vform);

	961 for (int i = 0; i < lane_count; i++) {

	962 dst.SetUint(vform, i,

	963 PolynomialMult(src1.Uint(vform_src, lane_count + i),

	964 src2.Uint(vform_src, lane_count + i)));

	965 }

	966 return dst;

	967 }

	968

	969 LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst,

	970 const LogicVRegister& src1,

	971 const LogicVRegister& src2) {

	972 int lane_size = LaneSizeInBitsFromFormat(vform);

	973 dst.ClearForWrite(vform);

	974 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	975 // Test for unsigned saturation.

	976 uint64_t ua = src1.UintLeftJustified(vform, i);

	977 uint64_t ub = src2.UintLeftJustified(vform, i);

	978 uint64_t ur = ua - ub;

	979 if (ub > ua) {

	980 dst.SetUnsignedSat(i, false);

	981 }

	982

	983 // Test for signed saturation.

	984 bool pos_a = (ua >> 63) == 0;

	985 bool pos_b = (ub >> 63) == 0;

	986 bool pos_r = (ur >> 63) == 0;

	987 // If the signs of the operands are different, and the sign of the first

	988 // operand doesn't match the result, there was an overflow.

	989 if ((pos_a != pos_b) && (pos_a != pos_r)) {

	990 dst.SetSignedSat(i, pos_a);

	991 }

	992

	993 dst.SetInt(vform, i, ur >> (64 - lane_size));

	994 }

	995 return dst;

	996 }

	997

	998 LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst,

	999 const LogicVRegister& src1,

	1000 const LogicVRegister& src2) {

	1001 dst.ClearForWrite(vform);

	1002 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1003 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));

	1004 }

	1005 return dst;

	1006 }

	1007

	1008 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,

	1009 const LogicVRegister& src1,

	1010 const LogicVRegister& src2) {

	1011 dst.ClearForWrite(vform);

	1012 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1013 dst.SetUint(vform, i, src1.Uint(vform, i) \| src2.Uint(vform, i));

	1014 }

	1015 return dst;

	1016 }

	1017

	1018 LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst,

	1019 const LogicVRegister& src1,

	1020 const LogicVRegister& src2) {

	1021 dst.ClearForWrite(vform);

	1022 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1023 dst.SetUint(vform, i, src1.Uint(vform, i) \| ~src2.Uint(vform, i));

	1024 }

	1025 return dst;

	1026 }

	1027

	1028 LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst,

	1029 const LogicVRegister& src1,

	1030 const LogicVRegister& src2) {

	1031 dst.ClearForWrite(vform);

	1032 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1033 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));

	1034 }

	1035 return dst;

	1036 }

	1037

	1038 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,

	1039 const LogicVRegister& src1,

	1040 const LogicVRegister& src2) {

	1041 dst.ClearForWrite(vform);

	1042 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1043 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));

	1044 }

	1045 return dst;

	1046 }

	1047

	1048 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,

	1049 const LogicVRegister& src, uint64_t imm) {

	1050 uint64_t result[16];

	1051 int laneCount = LaneCountFromFormat(vform);

	1052 for (int i = 0; i < laneCount; ++i) {

	1053 result[i] = src.Uint(vform, i) & ~imm;

	1054 }

	1055 dst.SetUintArray(vform, result);

	1056 return dst;

	1057 }

	1058

	1059 LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst,

	1060 const LogicVRegister& src1,

	1061 const LogicVRegister& src2) {

	1062 dst.ClearForWrite(vform);

	1063 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1064 uint64_t operand1 = dst.Uint(vform, i);

	1065 uint64_t operand2 = ~src2.Uint(vform, i);

	1066 uint64_t operand3 = src1.Uint(vform, i);

	1067 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);

	1068 dst.SetUint(vform, i, result);

	1069 }

	1070 return dst;

	1071 }

	1072

	1073 LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst,

	1074 const LogicVRegister& src1,

	1075 const LogicVRegister& src2) {

	1076 dst.ClearForWrite(vform);

	1077 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1078 uint64_t operand1 = dst.Uint(vform, i);

	1079 uint64_t operand2 = src2.Uint(vform, i);

	1080 uint64_t operand3 = src1.Uint(vform, i);

	1081 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);

	1082 dst.SetUint(vform, i, result);

	1083 }

	1084 return dst;

	1085 }

	1086

	1087 LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst,

	1088 const LogicVRegister& src1,

	1089 const LogicVRegister& src2) {

	1090 dst.ClearForWrite(vform);

	1091 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1092 uint64_t operand1 = src2.Uint(vform, i);

	1093 uint64_t operand2 = dst.Uint(vform, i);

	1094 uint64_t operand3 = src1.Uint(vform, i);

	1095 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);

	1096 dst.SetUint(vform, i, result);

	1097 }

	1098 return dst;

	1099 }

	1100

	1101 LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst,

	1102 const LogicVRegister& src1,

	1103 const LogicVRegister& src2, bool max) {

	1104 dst.ClearForWrite(vform);

	1105 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1106 int64_t src1_val = src1.Int(vform, i);

	1107 int64_t src2_val = src2.Int(vform, i);

	1108 int64_t dst_val;

	1109 if (max) {

	1110 dst_val = (src1_val > src2_val) ? src1_val : src2_val;

	1111 } else {

	1112 dst_val = (src1_val < src2_val) ? src1_val : src2_val;

	1113 }

	1114 dst.SetInt(vform, i, dst_val);

	1115 }

	1116 return dst;

	1117 }

	1118

	1119 LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst,

	1120 const LogicVRegister& src1,

	1121 const LogicVRegister& src2) {

	1122 return SMinMax(vform, dst, src1, src2, true);

	1123 }

	1124

	1125 LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst,

	1126 const LogicVRegister& src1,

	1127 const LogicVRegister& src2) {

	1128 return SMinMax(vform, dst, src1, src2, false);

	1129 }

	1130

	1131 LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst,

	1132 const LogicVRegister& src1,

	1133 const LogicVRegister& src2, bool max) {

	1134 int lanes = LaneCountFromFormat(vform);

	1135 int64_t result[kMaxLanesPerVector];

	1136 const LogicVRegister* src = &src1;

	1137 for (int j = 0; j < 2; j++) {

	1138 for (int i = 0; i < lanes; i += 2) {

	1139 int64_t first_val = src->Int(vform, i);

	1140 int64_t second_val = src->Int(vform, i + 1);

	1141 int64_t dst_val;

	1142 if (max) {

	1143 dst_val = (first_val > second_val) ? first_val : second_val;

	1144 } else {

	1145 dst_val = (first_val < second_val) ? first_val : second_val;

	1146 }

	1147 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);

	1148 result[(i >> 1) + (j * lanes / 2)] = dst_val;

	1149 }

	1150 src = &src2;

	1151 }

	1152 dst.SetIntArray(vform, result);

	1153 return dst;

	1154 }

	1155

	1156 LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst,

	1157 const LogicVRegister& src1,

	1158 const LogicVRegister& src2) {

	1159 return SMinMaxP(vform, dst, src1, src2, true);

	1160 }

	1161

	1162 LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst,

	1163 const LogicVRegister& src1,

	1164 const LogicVRegister& src2) {

	1165 return SMinMaxP(vform, dst, src1, src2, false);

	1166 }

	1167

	1168 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,

	1169 const LogicVRegister& src) {

	1170 DCHECK_EQ(vform, kFormatD);

	1171

	1172 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);

	1173 dst.ClearForWrite(vform);

	1174 dst.SetUint(vform, 0, dst_val);

	1175 return dst;

	1176 }

	1177

	1178 LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst,

	1179 const LogicVRegister& src) {

	1180 VectorFormat vform_dst =

	1181 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));

	1182

	1183 int64_t dst_val = 0;

	1184 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1185 dst_val += src.Int(vform, i);

	1186 }

	1187

	1188 dst.ClearForWrite(vform_dst);

	1189 dst.SetInt(vform_dst, 0, dst_val);

	1190 return dst;

	1191 }

	1192

	1193 LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst,

	1194 const LogicVRegister& src) {

	1195 VectorFormat vform_dst =

	1196 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);

	1197

	1198 int64_t dst_val = 0;

	1199 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1200 dst_val += src.Int(vform, i);

	1201 }

	1202

	1203 dst.ClearForWrite(vform_dst);

	1204 dst.SetInt(vform_dst, 0, dst_val);

	1205 return dst;

	1206 }

	1207

	1208 LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst,

	1209 const LogicVRegister& src) {

	1210 VectorFormat vform_dst =

	1211 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);

	1212

	1213 uint64_t dst_val = 0;

	1214 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1215 dst_val += src.Uint(vform, i);

	1216 }

	1217

	1218 dst.ClearForWrite(vform_dst);

	1219 dst.SetUint(vform_dst, 0, dst_val);

	1220 return dst;

	1221 }

	1222

	1223 LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst,

	1224 const LogicVRegister& src, bool max) {

	1225 int64_t dst_val = max ? INT64_MIN : INT64_MAX;

	1226 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1227 int64_t src_val = src.Int(vform, i);

	1228 if (max) {

	1229 dst_val = (src_val > dst_val) ? src_val : dst_val;

	1230 } else {

	1231 dst_val = (src_val < dst_val) ? src_val : dst_val;

	1232 }

	1233 }

	1234 dst.ClearForWrite(ScalarFormatFromFormat(vform));

	1235 dst.SetInt(vform, 0, dst_val);

	1236 return dst;

	1237 }

	1238

	1239 LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst,

	1240 const LogicVRegister& src) {

	1241 SMinMaxV(vform, dst, src, true);

	1242 return dst;

	1243 }

	1244

	1245 LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst,

	1246 const LogicVRegister& src) {

	1247 SMinMaxV(vform, dst, src, false);

	1248 return dst;

	1249 }

	1250

	1251 LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst,

	1252 const LogicVRegister& src1,

	1253 const LogicVRegister& src2, bool max) {

	1254 dst.ClearForWrite(vform);

	1255 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1256 uint64_t src1_val = src1.Uint(vform, i);

	1257 uint64_t src2_val = src2.Uint(vform, i);

	1258 uint64_t dst_val;

	1259 if (max) {

	1260 dst_val = (src1_val > src2_val) ? src1_val : src2_val;

	1261 } else {

	1262 dst_val = (src1_val < src2_val) ? src1_val : src2_val;

	1263 }

	1264 dst.SetUint(vform, i, dst_val);

	1265 }

	1266 return dst;

	1267 }

	1268

	1269 LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst,

	1270 const LogicVRegister& src1,

	1271 const LogicVRegister& src2) {

	1272 return UMinMax(vform, dst, src1, src2, true);

	1273 }

	1274

	1275 LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst,

	1276 const LogicVRegister& src1,

	1277 const LogicVRegister& src2) {

	1278 return UMinMax(vform, dst, src1, src2, false);

	1279 }

	1280

	1281 LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst,

	1282 const LogicVRegister& src1,

	1283 const LogicVRegister& src2, bool max) {

	1284 int lanes = LaneCountFromFormat(vform);

	1285 uint64_t result[kMaxLanesPerVector];

	1286 const LogicVRegister* src = &src1;

	1287 for (int j = 0; j < 2; j++) {

	1288 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {

	1289 uint64_t first_val = src->Uint(vform, i);

	1290 uint64_t second_val = src->Uint(vform, i + 1);

	1291 uint64_t dst_val;

	1292 if (max) {

	1293 dst_val = (first_val > second_val) ? first_val : second_val;

	1294 } else {

	1295 dst_val = (first_val < second_val) ? first_val : second_val;

	1296 }

	1297 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);

	1298 result[(i >> 1) + (j * lanes / 2)] = dst_val;

	1299 }

	1300 src = &src2;

	1301 }

	1302 dst.SetUintArray(vform, result);

	1303 return dst;

	1304 }

	1305

	1306 LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst,

	1307 const LogicVRegister& src1,

	1308 const LogicVRegister& src2) {

	1309 return UMinMaxP(vform, dst, src1, src2, true);

	1310 }

	1311

	1312 LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst,

	1313 const LogicVRegister& src1,

	1314 const LogicVRegister& src2) {

	1315 return UMinMaxP(vform, dst, src1, src2, false);

	1316 }

	1317

	1318 LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst,

	1319 const LogicVRegister& src, bool max) {

	1320 uint64_t dst_val = max ? 0 : UINT64_MAX;

	1321 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1322 uint64_t src_val = src.Uint(vform, i);

	1323 if (max) {

	1324 dst_val = (src_val > dst_val) ? src_val : dst_val;

	1325 } else {

	1326 dst_val = (src_val < dst_val) ? src_val : dst_val;

	1327 }

	1328 }

	1329 dst.ClearForWrite(ScalarFormatFromFormat(vform));

	1330 dst.SetUint(vform, 0, dst_val);

	1331 return dst;

	1332 }

	1333

	1334 LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst,

	1335 const LogicVRegister& src) {

	1336 UMinMaxV(vform, dst, src, true);

	1337 return dst;

	1338 }

	1339

	1340 LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst,

	1341 const LogicVRegister& src) {

	1342 UMinMaxV(vform, dst, src, false);

	1343 return dst;

	1344 }

	1345

	1346 LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst,

	1347 const LogicVRegister& src, int shift) {

	1348 DCHECK_GE(shift, 0);

	1349 SimVRegister temp;

	1350 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);

	1351 return ushl(vform, dst, src, shiftreg);

	1352 }

	1353

	1354 LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst,

	1355 const LogicVRegister& src, int shift) {

	1356 DCHECK_GE(shift, 0);

	1357 SimVRegister temp1, temp2;

	1358 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);

	1359 LogicVRegister extendedreg = sxtl(vform, temp2, src);

	1360 return sshl(vform, dst, extendedreg, shiftreg);

	1361 }

	1362

	1363 LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst,

	1364 const LogicVRegister& src, int shift) {

	1365 DCHECK_GE(shift, 0);

	1366 SimVRegister temp1, temp2;

	1367 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);

	1368 LogicVRegister extendedreg = sxtl2(vform, temp2, src);

	1369 return sshl(vform, dst, extendedreg, shiftreg);

	1370 }

	1371

	1372 LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst,

	1373 const LogicVRegister& src) {

	1374 int shift = LaneSizeInBitsFromFormat(vform) / 2;

	1375 return sshll(vform, dst, src, shift);

	1376 }

	1377

	1378 LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst,

	1379 const LogicVRegister& src) {

	1380 int shift = LaneSizeInBitsFromFormat(vform) / 2;

	1381 return sshll2(vform, dst, src, shift);

	1382 }

	1383

	1384 LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst,

	1385 const LogicVRegister& src, int shift) {

	1386 DCHECK_GE(shift, 0);

	1387 SimVRegister temp1, temp2;

	1388 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);

	1389 LogicVRegister extendedreg = uxtl(vform, temp2, src);

	1390 return ushl(vform, dst, extendedreg, shiftreg);

	1391 }

	1392

	1393 LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst,

	1394 const LogicVRegister& src, int shift) {

	1395 DCHECK_GE(shift, 0);

	1396 SimVRegister temp1, temp2;

	1397 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);

	1398 LogicVRegister extendedreg = uxtl2(vform, temp2, src);

	1399 return ushl(vform, dst, extendedreg, shiftreg);

	1400 }

	1401

	1402 LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst,

	1403 const LogicVRegister& src, int shift) {

	1404 dst.ClearForWrite(vform);

	1405 int laneCount = LaneCountFromFormat(vform);

	1406 for (int i = 0; i < laneCount; i++) {

	1407 uint64_t src_lane = src.Uint(vform, i);

	1408 uint64_t dst_lane = dst.Uint(vform, i);

	1409 uint64_t shifted = src_lane << shift;

	1410 uint64_t mask = MaxUintFromFormat(vform) << shift;

	1411 dst.SetUint(vform, i, (dst_lane & ~mask) \| shifted);

	1412 }

	1413 return dst;

	1414 }

	1415

	1416 LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst,

	1417 const LogicVRegister& src, int shift) {

	1418 DCHECK_GE(shift, 0);

	1419 SimVRegister temp;

	1420 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);

	1421 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);

	1422 }

	1423

	1424 LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst,

	1425 const LogicVRegister& src, int shift) {

	1426 DCHECK_GE(shift, 0);

	1427 SimVRegister temp;

	1428 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);

	1429 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);

	1430 }

	1431

	1432 LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst,

	1433 const LogicVRegister& src, int shift) {

	1434 DCHECK_GE(shift, 0);

	1435 SimVRegister temp;

	1436 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);

	1437 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);

	1438 }

	1439

	1440 LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst,

	1441 const LogicVRegister& src, int shift) {

	1442 dst.ClearForWrite(vform);

	1443 int laneCount = LaneCountFromFormat(vform);

	1444 DCHECK((shift > 0) &&

	1445 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));

	1446 for (int i = 0; i < laneCount; i++) {

	1447 uint64_t src_lane = src.Uint(vform, i);

	1448 uint64_t dst_lane = dst.Uint(vform, i);

	1449 uint64_t shifted;

	1450 uint64_t mask;

	1451 if (shift == 64) {

	1452 shifted = 0;

	1453 mask = 0;

	1454 } else {

	1455 shifted = src_lane >> shift;

	1456 mask = MaxUintFromFormat(vform) >> shift;

	1457 }

	1458 dst.SetUint(vform, i, (dst_lane & ~mask) \| shifted);

	1459 }

	1460 return dst;

	1461 }

	1462

	1463 LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst,

	1464 const LogicVRegister& src, int shift) {

	1465 DCHECK_GE(shift, 0);

	1466 SimVRegister temp;

	1467 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);

	1468 return ushl(vform, dst, src, shiftreg);

	1469 }

	1470

	1471 LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst,

	1472 const LogicVRegister& src, int shift) {

	1473 DCHECK_GE(shift, 0);

	1474 SimVRegister temp;

	1475 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);

	1476 return sshl(vform, dst, src, shiftreg);

	1477 }

	1478

	1479 LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst,

	1480 const LogicVRegister& src, int shift) {

	1481 SimVRegister temp;

	1482 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);

	1483 return add(vform, dst, dst, shifted_reg);

	1484 }

	1485

	1486 LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst,

	1487 const LogicVRegister& src, int shift) {

	1488 SimVRegister temp;

	1489 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);

	1490 return add(vform, dst, dst, shifted_reg);

	1491 }

	1492

	1493 LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst,

	1494 const LogicVRegister& src, int shift) {

	1495 SimVRegister temp;

	1496 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);

	1497 return add(vform, dst, dst, shifted_reg);

	1498 }

	1499

	1500 LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst,

	1501 const LogicVRegister& src, int shift) {

	1502 SimVRegister temp;

	1503 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);

	1504 return add(vform, dst, dst, shifted_reg);

	1505 }

	1506

	1507 LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst,

	1508 const LogicVRegister& src) {

	1509 uint64_t result[16];

	1510 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);

	1511 int laneCount = LaneCountFromFormat(vform);

	1512 for (int i = 0; i < laneCount; i++) {

	1513 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);

	1514 }

	1515

	1516 dst.SetUintArray(vform, result);

	1517 return dst;

	1518 }

	1519

	1520 LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst,

	1521 const LogicVRegister& src) {

	1522 uint64_t result[16];

	1523 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);

	1524 int laneCount = LaneCountFromFormat(vform);

	1525 for (int i = 0; i < laneCount; i++) {

	1526 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);

	1527 }

	1528

	1529 dst.SetUintArray(vform, result);

	1530 return dst;

	1531 }

	1532

	1533 LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst,

	1534 const LogicVRegister& src) {

	1535 uint64_t result[16];

	1536 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);

	1537 int laneCount = LaneCountFromFormat(vform);

	1538 for (int i = 0; i < laneCount; i++) {

	1539 uint64_t value = src.Uint(vform, i);

	1540 result[i] = 0;

	1541 for (int j = 0; j < laneSizeInBits; j++) {

	1542 result[i] += (value & 1);

	1543 value >>= 1;

	1544 }

	1545 }

	1546

	1547 dst.SetUintArray(vform, result);

	1548 return dst;

	1549 }

	1550

	1551 LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst,

	1552 const LogicVRegister& src1,

	1553 const LogicVRegister& src2) {

	1554 dst.ClearForWrite(vform);

	1555 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1556 int8_t shift_val = src2.Int(vform, i);

	1557 int64_t lj_src_val = src1.IntLeftJustified(vform, i);

	1558

	1559 // Set signed saturation state.

	1560 if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) &&

	1561 (lj_src_val != 0)) {

	1562 dst.SetSignedSat(i, lj_src_val >= 0);

	1563 }

	1564

	1565 // Set unsigned saturation state.

	1566 if (lj_src_val < 0) {

	1567 dst.SetUnsignedSat(i, false);

	1568 } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) &&

	1569 (lj_src_val != 0)) {

	1570 dst.SetUnsignedSat(i, true);

	1571 }

	1572

	1573 int64_t src_val = src1.Int(vform, i);

	1574 bool src_is_negative = src_val < 0;

	1575 if (shift_val > 63) {

	1576 dst.SetInt(vform, i, 0);

	1577 } else if (shift_val < -63) {

	1578 dst.SetRounding(i, src_is_negative);

	1579 dst.SetInt(vform, i, src_is_negative ? -1 : 0);

	1580 } else {

	1581 // Use unsigned types for shifts, as behaviour is undefined for signed

	1582 // lhs.

	1583 uint64_t usrc_val = static_cast<uint64_t>(src_val);

	1584

	1585 if (shift_val < 0) {

	1586 // Convert to right shift.

	1587 shift_val = -shift_val;

	1588

	1589 // Set rounding state by testing most-significant bit shifted out.

	1590 // Rounding only needed on right shifts.

	1591 if (((usrc_val >> (shift_val - 1)) & 1) == 1) {

	1592 dst.SetRounding(i, true);

	1593 }

	1594

	1595 usrc_val >>= shift_val;

	1596

	1597 if (src_is_negative) {

	1598 // Simulate sign-extension.

	1599 usrc_val \|= (~UINT64_C(0) << (64 - shift_val));

	1600 }

	1601 } else {

	1602 usrc_val <<= shift_val;

	1603 }

	1604 dst.SetUint(vform, i, usrc_val);

	1605 }

	1606 }

	1607 return dst;

	1608 }

	1609

	1610 LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst,

	1611 const LogicVRegister& src1,

	1612 const LogicVRegister& src2) {

	1613 dst.ClearForWrite(vform);

	1614 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1615 int8_t shift_val = src2.Int(vform, i);

	1616 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);

	1617

	1618 // Set saturation state.

	1619 if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) {

	1620 dst.SetUnsignedSat(i, true);

	1621 }

	1622

	1623 uint64_t src_val = src1.Uint(vform, i);

	1624 if ((shift_val > 63) \|\| (shift_val < -64)) {

	1625 dst.SetUint(vform, i, 0);

	1626 } else {

	1627 if (shift_val < 0) {

	1628 // Set rounding state. Rounding only needed on right shifts.

	1629 if (((src_val >> (-shift_val - 1)) & 1) == 1) {

	1630 dst.SetRounding(i, true);

	1631 }

	1632

	1633 if (shift_val == -64) {

	1634 src_val = 0;

	1635 } else {

	1636 src_val >>= -shift_val;

	1637 }

	1638 } else {

	1639 src_val <<= shift_val;

	1640 }

	1641 dst.SetUint(vform, i, src_val);

	1642 }

	1643 }

	1644 return dst;

	1645 }

	1646

	1647 LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst,

	1648 const LogicVRegister& src) {

	1649 dst.ClearForWrite(vform);

	1650 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1651 // Test for signed saturation.

	1652 int64_t sa = src.Int(vform, i);

	1653 if (sa == MinIntFromFormat(vform)) {

	1654 dst.SetSignedSat(i, true);

	1655 }

	1656 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);

	1657 }

	1658 return dst;

	1659 }

	1660

	1661 LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst,

	1662 const LogicVRegister& src) {

	1663 dst.ClearForWrite(vform);

	1664 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1665 int64_t sa = dst.IntLeftJustified(vform, i);

	1666 uint64_t ub = src.UintLeftJustified(vform, i);

	1667 uint64_t ur = sa + ub;

	1668

	1669 int64_t sr = bit_cast<int64_t>(ur);

	1670 if (sr < sa) { // Test for signed positive saturation.

	1671 dst.SetInt(vform, i, MaxIntFromFormat(vform));

	1672 } else {

	1673 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));

	1674 }

	1675 }

	1676 return dst;

	1677 }

	1678

	1679 LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst,

	1680 const LogicVRegister& src) {

	1681 dst.ClearForWrite(vform);

	1682 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1683 uint64_t ua = dst.UintLeftJustified(vform, i);

	1684 int64_t sb = src.IntLeftJustified(vform, i);

	1685 uint64_t ur = ua + sb;

	1686

	1687 if ((sb > 0) && (ur <= ua)) {

	1688 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.

	1689 } else if ((sb < 0) && (ur >= ua)) {

	1690 dst.SetUint(vform, i, 0); // Negative saturation.

	1691 } else {

	1692 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));

	1693 }

	1694 }

	1695 return dst;

	1696 }

	1697

	1698 LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst,

	1699 const LogicVRegister& src) {

	1700 dst.ClearForWrite(vform);

	1701 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1702 // Test for signed saturation.

	1703 int64_t sa = src.Int(vform, i);

	1704 if (sa == MinIntFromFormat(vform)) {

	1705 dst.SetSignedSat(i, true);

	1706 }

	1707 if (sa < 0) {

	1708 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);

	1709 } else {

	1710 dst.SetInt(vform, i, sa);

	1711 }

	1712 }

	1713 return dst;

	1714 }

	1715

	1716 LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform,

	1717 LogicVRegister dst, bool dstIsSigned,

	1718 const LogicVRegister& src,

	1719 bool srcIsSigned) {

	1720 bool upperhalf = false;

	1721 VectorFormat srcform = kFormatUndefined;

	1722 int64_t ssrc[8];

	1723 uint64_t usrc[8];

	1724

	1725 switch (dstform) {

	1726 case kFormat8B:

	1727 upperhalf = false;

	1728 srcform = kFormat8H;

	1729 break;

	1730 case kFormat16B:

	1731 upperhalf = true;

	1732 srcform = kFormat8H;

	1733 break;

	1734 case kFormat4H:

	1735 upperhalf = false;

	1736 srcform = kFormat4S;

	1737 break;

	1738 case kFormat8H:

	1739 upperhalf = true;

	1740 srcform = kFormat4S;

	1741 break;

	1742 case kFormat2S:

	1743 upperhalf = false;

	1744 srcform = kFormat2D;

	1745 break;

	1746 case kFormat4S:

	1747 upperhalf = true;

	1748 srcform = kFormat2D;

	1749 break;

	1750 case kFormatB:

	1751 upperhalf = false;

	1752 srcform = kFormatH;

	1753 break;

	1754 case kFormatH:

	1755 upperhalf = false;

	1756 srcform = kFormatS;

	1757 break;

	1758 case kFormatS:

	1759 upperhalf = false;

	1760 srcform = kFormatD;

	1761 break;

	1762 default:

	1763 UNIMPLEMENTED();

	1764 }

	1765

	1766 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {

	1767 ssrc[i] = src.Int(srcform, i);

	1768 usrc[i] = src.Uint(srcform, i);

	1769 }

	1770

	1771 int offset;

	1772 if (upperhalf) {

	1773 offset = LaneCountFromFormat(dstform) / 2;

	1774 } else {

	1775 offset = 0;

	1776 dst.ClearForWrite(dstform);

	1777 }

	1778

	1779 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {

	1780 // Test for signed saturation

	1781 if (ssrc[i] > MaxIntFromFormat(dstform)) {

	1782 dst.SetSignedSat(offset + i, true);

	1783 } else if (ssrc[i] < MinIntFromFormat(dstform)) {

	1784 dst.SetSignedSat(offset + i, false);

	1785 }

	1786

	1787 // Test for unsigned saturation

	1788 if (srcIsSigned) {

	1789 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {

	1790 dst.SetUnsignedSat(offset + i, true);

	1791 } else if (ssrc[i] < 0) {

	1792 dst.SetUnsignedSat(offset + i, false);

	1793 }

	1794 } else {

	1795 if (usrc[i] > MaxUintFromFormat(dstform)) {

	1796 dst.SetUnsignedSat(offset + i, true);

	1797 }

	1798 }

	1799

	1800 int64_t result;

	1801 if (srcIsSigned) {

	1802 result = ssrc[i] & MaxUintFromFormat(dstform);

	1803 } else {

	1804 result = usrc[i] & MaxUintFromFormat(dstform);

	1805 }

	1806

	1807 if (dstIsSigned) {

	1808 dst.SetInt(dstform, offset + i, result);

	1809 } else {

	1810 dst.SetUint(dstform, offset + i, result);

	1811 }

	1812 }

	1813 return dst;

	1814 }

	1815

	1816 LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst,

	1817 const LogicVRegister& src) {

	1818 return ExtractNarrow(vform, dst, true, src, true);

	1819 }

	1820

	1821 LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst,

	1822 const LogicVRegister& src) {

	1823 return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform);

	1824 }

	1825

	1826 LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst,

	1827 const LogicVRegister& src) {

	1828 return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform);

	1829 }

	1830

	1831 LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst,

	1832 const LogicVRegister& src) {

	1833 return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform);

	1834 }

	1835

	1836 LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst,

	1837 const LogicVRegister& src1,

	1838 const LogicVRegister& src2, bool issigned) {

	1839 dst.ClearForWrite(vform);

	1840 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1841 if (issigned) {

	1842 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);

	1843 sr = sr > 0 ? sr : -sr;

	1844 dst.SetInt(vform, i, sr);

	1845 } else {

	1846 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);

	1847 sr = sr > 0 ? sr : -sr;

	1848 dst.SetUint(vform, i, sr);

	1849 }

	1850 }

	1851 return dst;

	1852 }

	1853

	1854 LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst,

	1855 const LogicVRegister& src1,

	1856 const LogicVRegister& src2) {

	1857 SimVRegister temp;

	1858 dst.ClearForWrite(vform);

	1859 AbsDiff(vform, temp, src1, src2, true);

	1860 add(vform, dst, dst, temp);

	1861 return dst;

	1862 }

	1863

	1864 LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst,

	1865 const LogicVRegister& src1,

	1866 const LogicVRegister& src2) {

	1867 SimVRegister temp;

	1868 dst.ClearForWrite(vform);

	1869 AbsDiff(vform, temp, src1, src2, false);

	1870 add(vform, dst, dst, temp);

	1871 return dst;

	1872 }

	1873

	1874 LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst,

	1875 const LogicVRegister& src) {

	1876 dst.ClearForWrite(vform);

	1877 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1878 dst.SetUint(vform, i, ~src.Uint(vform, i));

	1879 }

	1880 return dst;

	1881 }

	1882

	1883 LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst,

	1884 const LogicVRegister& src) {

	1885 uint64_t result[16];

	1886 int laneCount = LaneCountFromFormat(vform);

	1887 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);

	1888 uint64_t reversed_value;

	1889 uint64_t value;

	1890 for (int i = 0; i < laneCount; i++) {

	1891 value = src.Uint(vform, i);

	1892 reversed_value = 0;

	1893 for (int j = 0; j < laneSizeInBits; j++) {

	1894 reversed_value = (reversed_value << 1) \| (value & 1);

	1895 value >>= 1;

	1896 }

	1897 result[i] = reversed_value;

	1898 }

	1899

	1900 dst.SetUintArray(vform, result);

	1901 return dst;

	1902 }

	1903

	1904 LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst,

	1905 const LogicVRegister& src, int revSize) {

	1906 uint64_t result[16];

	1907 int laneCount = LaneCountFromFormat(vform);

	1908 int laneSize = LaneSizeInBytesFromFormat(vform);

	1909 int lanesPerLoop = revSize / laneSize;

	1910 for (int i = 0; i < laneCount; i += lanesPerLoop) {

	1911 for (int j = 0; j < lanesPerLoop; j++) {

	1912 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);

	1913 }

	1914 }

	1915 dst.SetUintArray(vform, result);

	1916 return dst;

	1917 }

	1918

	1919 LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst,

	1920 const LogicVRegister& src) {

	1921 return rev(vform, dst, src, 2);

	1922 }

	1923

	1924 LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst,

	1925 const LogicVRegister& src) {

	1926 return rev(vform, dst, src, 4);

	1927 }

	1928

	1929 LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst,

	1930 const LogicVRegister& src) {

	1931 return rev(vform, dst, src, 8);

	1932 }

	1933

	1934 LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst,

	1935 const LogicVRegister& src, bool is_signed,

	1936 bool do_accumulate) {

	1937 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);

	1938 DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U);

	1939 DCHECK_LE(LaneCountFromFormat(vform), 8);

	1940

	1941 uint64_t result[8];

	1942 int lane_count = LaneCountFromFormat(vform);

	1943 for (int i = 0; i < lane_count; i++) {

	1944 if (is_signed) {

	1945 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +

	1946 src.Int(vformsrc, 2 * i + 1));

	1947 } else {

	1948 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);

	1949 }

	1950 }

	1951

	1952 dst.ClearForWrite(vform);

	1953 for (int i = 0; i < lane_count; ++i) {

	1954 if (do_accumulate) {

	1955 result[i] += dst.Uint(vform, i);

	1956 }

	1957 dst.SetUint(vform, i, result[i]);

	1958 }

	1959

	1960 return dst;

	1961 }

	1962

	1963 LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst,

	1964 const LogicVRegister& src) {

	1965 return addlp(vform, dst, src, true, false);

	1966 }

	1967

	1968 LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst,

	1969 const LogicVRegister& src) {

	1970 return addlp(vform, dst, src, false, false);

	1971 }

	1972

	1973 LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst,

	1974 const LogicVRegister& src) {

	1975 return addlp(vform, dst, src, true, true);

	1976 }

	1977

	1978 LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst,

	1979 const LogicVRegister& src) {

	1980 return addlp(vform, dst, src, false, true);

	1981 }

	1982

	1983 LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst,

	1984 const LogicVRegister& src1,

	1985 const LogicVRegister& src2, int index) {

	1986 uint8_t result[16];

	1987 int laneCount = LaneCountFromFormat(vform);

	1988 for (int i = 0; i < laneCount - index; ++i) {

	1989 result[i] = src1.Uint(vform, i + index);

	1990 }

	1991 for (int i = 0; i < index; ++i) {

	1992 result[laneCount - index + i] = src2.Uint(vform, i);

	1993 }

	1994 dst.ClearForWrite(vform);

	1995 for (int i = 0; i < laneCount; ++i) {

	1996 dst.SetUint(vform, i, result[i]);

	1997 }

	1998 return dst;

	1999 }

	2000

	2001 LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst,

	2002 const LogicVRegister& src,

	2003 int src_index) {

	2004 int laneCount = LaneCountFromFormat(vform);

	2005 uint64_t value = src.Uint(vform, src_index);

	2006 dst.ClearForWrite(vform);

	2007 for (int i = 0; i < laneCount; ++i) {

	2008 dst.SetUint(vform, i, value);

	2009 }

	2010 return dst;

	2011 }

	2012

	2013 LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst,

	2014 uint64_t imm) {

	2015 int laneCount = LaneCountFromFormat(vform);

	2016 uint64_t value = imm & MaxUintFromFormat(vform);

	2017 dst.ClearForWrite(vform);

	2018 for (int i = 0; i < laneCount; ++i) {

	2019 dst.SetUint(vform, i, value);

	2020 }

	2021 return dst;

	2022 }

	2023

	2024 LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst,

	2025 int dst_index, const LogicVRegister& src,

	2026 int src_index) {

	2027 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));

	2028 return dst;

	2029 }

	2030

	2031 LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst,

	2032 int dst_index, uint64_t imm) {

	2033 uint64_t value = imm & MaxUintFromFormat(vform);

	2034 dst.SetUint(vform, dst_index, value);

	2035 return dst;

	2036 }

	2037

	2038 LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst,

	2039 uint64_t imm) {

	2040 int laneCount = LaneCountFromFormat(vform);

	2041 dst.ClearForWrite(vform);

	2042 for (int i = 0; i < laneCount; ++i) {

	2043 dst.SetUint(vform, i, imm);

	2044 }

	2045 return dst;

	2046 }

	2047

	2048 LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst,

	2049 uint64_t imm) {

	2050 int laneCount = LaneCountFromFormat(vform);

	2051 dst.ClearForWrite(vform);

	2052 for (int i = 0; i < laneCount; ++i) {

	2053 dst.SetUint(vform, i, ~imm);

	2054 }

	2055 return dst;

	2056 }

	2057

	2058 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,

	2059 const LogicVRegister& src, uint64_t imm) {

	2060 uint64_t result[16];

	2061 int laneCount = LaneCountFromFormat(vform);

	2062 for (int i = 0; i < laneCount; ++i) {

	2063 result[i] = src.Uint(vform, i) \| imm;

	2064 }

	2065 dst.SetUintArray(vform, result);

	2066 return dst;

	2067 }

	2068

	2069 LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst,

	2070 const LogicVRegister& src) {

	2071 VectorFormat vform_half = VectorFormatHalfWidth(vform);

	2072

	2073 dst.ClearForWrite(vform);

	2074 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	2075 dst.SetUint(vform, i, src.Uint(vform_half, i));

	2076 }

	2077 return dst;

	2078 }

	2079

	2080 LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst,

	2081 const LogicVRegister& src) {

	2082 VectorFormat vform_half = VectorFormatHalfWidth(vform);

	2083

	2084 dst.ClearForWrite(vform);

	2085 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	2086 dst.SetInt(vform, i, src.Int(vform_half, i));

	2087 }

	2088 return dst;

	2089 }

	2090

	2091 LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst,

	2092 const LogicVRegister& src) {

	2093 VectorFormat vform_half = VectorFormatHalfWidth(vform);

	2094 int lane_count = LaneCountFromFormat(vform);

	2095

	2096 dst.ClearForWrite(vform);

	2097 for (int i = 0; i < lane_count; i++) {

	2098 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));

	2099 }

	2100 return dst;

	2101 }

	2102

	2103 LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst,

	2104 const LogicVRegister& src) {

	2105 VectorFormat vform_half = VectorFormatHalfWidth(vform);

	2106 int lane_count = LaneCountFromFormat(vform);

	2107

	2108 dst.ClearForWrite(vform);

	2109 for (int i = 0; i < lane_count; i++) {

	2110 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));

	2111 }

	2112 return dst;

	2113 }

	2114

	2115 LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst,

	2116 const LogicVRegister& src, int shift) {

	2117 SimVRegister temp;

	2118 VectorFormat vform_src = VectorFormatDoubleWidth(vform);

	2119 VectorFormat vform_dst = vform;

	2120 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);

	2121 return ExtractNarrow(vform_dst, dst, false, shifted_src, false);

	2122 }

	2123

	2124 LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst,

	2125 const LogicVRegister& src, int shift) {

	2126 SimVRegister temp;

	2127 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));

	2128 VectorFormat vformdst = vform;

	2129 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);

	2130 return ExtractNarrow(vformdst, dst, false, shifted_src, false);

	2131 }

	2132

	2133 LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst,

	2134 const LogicVRegister& src, int shift) {

	2135 SimVRegister temp;

	2136 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);

	2137 VectorFormat vformdst = vform;

	2138 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);

	2139 return ExtractNarrow(vformdst, dst, false, shifted_src, false);

	2140 }

	2141

	2142 LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst,

	2143 const LogicVRegister& src, int shift) {

	2144 SimVRegister temp;

	2145 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));

	2146 VectorFormat vformdst = vform;

	2147 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);

	2148 return ExtractNarrow(vformdst, dst, false, shifted_src, false);

	2149 }

	2150

	2151 LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst,

	2152 const LogicVRegister& ind,

	2153 bool zero_out_of_bounds,

	2154 const LogicVRegister* tab1,

	2155 const LogicVRegister* tab2,

	2156 const LogicVRegister* tab3,

	2157 const LogicVRegister* tab4) {

	2158 DCHECK_NOT_NULL(tab1);

	2159 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};

	2160 uint64_t result[kMaxLanesPerVector];

	2161 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	2162 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);

	2163 }

	2164 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	2165 uint64_t j = ind.Uint(vform, i);

	2166 int tab_idx = static_cast<int>(j >> 4);

	2167 int j_idx = static_cast<int>(j & 15);

	2168 if ((tab_idx < 4) && (tab[tab_idx] != NULL)) {

	2169 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);

	2170 }

	2171 }

	2172 dst.SetUintArray(vform, result);

	2173 return dst;

	2174 }

	2175

	2176 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,

	2177 const LogicVRegister& tab,

	2178 const LogicVRegister& ind) {

	2179 return Table(vform, dst, ind, true, &tab);

	2180 }

	2181

	2182 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,

	2183 const LogicVRegister& tab,

	2184 const LogicVRegister& tab2,

	2185 const LogicVRegister& ind) {

	2186 return Table(vform, dst, ind, true, &tab, &tab2);

	2187 }

	2188

	2189 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,

	2190 const LogicVRegister& tab,

	2191 const LogicVRegister& tab2,

	2192 const LogicVRegister& tab3,

	2193 const LogicVRegister& ind) {

	2194 return Table(vform, dst, ind, true, &tab, &tab2, &tab3);

	2195 }

	2196

	2197 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,

	2198 const LogicVRegister& tab,

	2199 const LogicVRegister& tab2,

	2200 const LogicVRegister& tab3,

	2201 const LogicVRegister& tab4,

	2202 const LogicVRegister& ind) {

	2203 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);

	2204 }

	2205

	2206 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,

	2207 const LogicVRegister& tab,

	2208 const LogicVRegister& ind) {

	2209 return Table(vform, dst, ind, false, &tab);

	2210 }

	2211

	2212 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,

	2213 const LogicVRegister& tab,

	2214 const LogicVRegister& tab2,

	2215 const LogicVRegister& ind) {

	2216 return Table(vform, dst, ind, false, &tab, &tab2);

	2217 }

	2218

	2219 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,

	2220 const LogicVRegister& tab,

	2221 const LogicVRegister& tab2,

	2222 const LogicVRegister& tab3,

	2223 const LogicVRegister& ind) {

	2224 return Table(vform, dst, ind, false, &tab, &tab2, &tab3);

	2225 }

	2226

	2227 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,

	2228 const LogicVRegister& tab,

	2229 const LogicVRegister& tab2,

	2230 const LogicVRegister& tab3,

	2231 const LogicVRegister& tab4,

	2232 const LogicVRegister& ind) {

	2233 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);

	2234 }

	2235

	2236 LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst,

	2237 const LogicVRegister& src, int shift) {

	2238 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);

	2239 }

	2240

	2241 LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst,

	2242 const LogicVRegister& src, int shift) {

	2243 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);

	2244 }

	2245

	2246 LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst,

	2247 const LogicVRegister& src, int shift) {

	2248 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);

	2249 }

	2250

	2251 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst,

	2252 const LogicVRegister& src, int shift) {

	2253 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);

	2254 }

	2255

	2256 LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst,

	2257 const LogicVRegister& src, int shift) {

	2258 SimVRegister temp;

	2259 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);

	2260 VectorFormat vformdst = vform;

	2261 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);

	2262 return sqxtn(vformdst, dst, shifted_src);

	2263 }

	2264

	2265 LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst,

	2266 const LogicVRegister& src, int shift) {

	2267 SimVRegister temp;

	2268 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));

	2269 VectorFormat vformdst = vform;

	2270 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);

	2271 return sqxtn(vformdst, dst, shifted_src);

	2272 }

	2273

	2274 LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst,

	2275 const LogicVRegister& src, int shift) {

	2276 SimVRegister temp;

	2277 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);

	2278 VectorFormat vformdst = vform;

	2279 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);

	2280 return sqxtn(vformdst, dst, shifted_src);

	2281 }

	2282

	2283 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst,

	2284 const LogicVRegister& src, int shift) {

	2285 SimVRegister temp;

	2286 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));

	2287 VectorFormat vformdst = vform;

	2288 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);

	2289 return sqxtn(vformdst, dst, shifted_src);

	2290 }

	2291

	2292 LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst,

	2293 const LogicVRegister& src, int shift) {

	2294 SimVRegister temp;

	2295 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);

	2296 VectorFormat vformdst = vform;

	2297 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);

	2298 return sqxtun(vformdst, dst, shifted_src);

	2299 }

	2300

	2301 LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst,

	2302 const LogicVRegister& src, int shift) {

	2303 SimVRegister temp;

	2304 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));

	2305 VectorFormat vformdst = vform;

	2306 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);

	2307 return sqxtun(vformdst, dst, shifted_src);

	2308 }

	2309

	2310 LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst,

	2311 const LogicVRegister& src, int shift) {

	2312 SimVRegister temp;

	2313 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);

	2314 VectorFormat vformdst = vform;

	2315 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);

	2316 return sqxtun(vformdst, dst, shifted_src);

	2317 }

	2318

	2319 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst,

	2320 const LogicVRegister& src, int shift) {

	2321 SimVRegister temp;

	2322 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));

	2323 VectorFormat vformdst = vform;

	2324 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);

	2325 return sqxtun(vformdst, dst, shifted_src);

	2326 }

	2327

	2328 LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst,

	2329 const LogicVRegister& src1,

	2330 const LogicVRegister& src2) {

	2331 SimVRegister temp1, temp2;

	2332 uxtl(vform, temp1, src1);

	2333 uxtl(vform, temp2, src2);

	2334 add(vform, dst, temp1, temp2);

	2335 return dst;

	2336 }

	2337

	2338 LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst,

	2339 const LogicVRegister& src1,

	2340 const LogicVRegister& src2) {

	2341 SimVRegister temp1, temp2;

	2342 uxtl2(vform, temp1, src1);

	2343 uxtl2(vform, temp2, src2);

	2344 add(vform, dst, temp1, temp2);

	2345 return dst;

	2346 }

	2347

	2348 LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst,

	2349 const LogicVRegister& src1,

	2350 const LogicVRegister& src2) {

	2351 SimVRegister temp;

	2352 uxtl(vform, temp, src2);

	2353 add(vform, dst, src1, temp);

	2354 return dst;

	2355 }

	2356

	2357 LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst,

	2358 const LogicVRegister& src1,

	2359 const LogicVRegister& src2) {

	2360 SimVRegister temp;

	2361 uxtl2(vform, temp, src2);

	2362 add(vform, dst, src1, temp);

	2363 return dst;

	2364 }

	2365

	2366 LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst,

	2367 const LogicVRegister& src1,

	2368 const LogicVRegister& src2) {

	2369 SimVRegister temp1, temp2;

	2370 sxtl(vform, temp1, src1);

	2371 sxtl(vform, temp2, src2);

	2372 add(vform, dst, temp1, temp2);

	2373 return dst;

	2374 }

	2375

	2376 LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst,

	2377 const LogicVRegister& src1,

	2378 const LogicVRegister& src2) {

	2379 SimVRegister temp1, temp2;

	2380 sxtl2(vform, temp1, src1);

	2381 sxtl2(vform, temp2, src2);

	2382 add(vform, dst, temp1, temp2);

	2383 return dst;

	2384 }

	2385

	2386 LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst,

	2387 const LogicVRegister& src1,

	2388 const LogicVRegister& src2) {

	2389 SimVRegister temp;

	2390 sxtl(vform, temp, src2);

	2391 add(vform, dst, src1, temp);

	2392 return dst;

	2393 }

	2394

	2395 LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst,

	2396 const LogicVRegister& src1,

	2397 const LogicVRegister& src2) {

	2398 SimVRegister temp;

	2399 sxtl2(vform, temp, src2);

	2400 add(vform, dst, src1, temp);

	2401 return dst;

	2402 }

	2403

	2404 LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst,

	2405 const LogicVRegister& src1,

	2406 const LogicVRegister& src2) {

	2407 SimVRegister temp1, temp2;

	2408 uxtl(vform, temp1, src1);

	2409 uxtl(vform, temp2, src2);

	2410 sub(vform, dst, temp1, temp2);

	2411 return dst;

	2412 }

	2413

	2414 LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst,

	2415 const LogicVRegister& src1,

	2416 const LogicVRegister& src2) {

	2417 SimVRegister temp1, temp2;

	2418 uxtl2(vform, temp1, src1);

	2419 uxtl2(vform, temp2, src2);

	2420 sub(vform, dst, temp1, temp2);

	2421 return dst;

	2422 }

	2423

	2424 LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst,

	2425 const LogicVRegister& src1,

	2426 const LogicVRegister& src2) {

	2427 SimVRegister temp;

	2428 uxtl(vform, temp, src2);

	2429 sub(vform, dst, src1, temp);

	2430 return dst;

	2431 }

	2432

	2433 LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst,

	2434 const LogicVRegister& src1,

	2435 const LogicVRegister& src2) {

	2436 SimVRegister temp;

	2437 uxtl2(vform, temp, src2);

	2438 sub(vform, dst, src1, temp);

	2439 return dst;

	2440 }

	2441

	2442 LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst,

	2443 const LogicVRegister& src1,

	2444 const LogicVRegister& src2) {

	2445 SimVRegister temp1, temp2;

	2446 sxtl(vform, temp1, src1);

	2447 sxtl(vform, temp2, src2);

	2448 sub(vform, dst, temp1, temp2);

	2449 return dst;

	2450 }

	2451

	2452 LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst,

	2453 const LogicVRegister& src1,

	2454 const LogicVRegister& src2) {

	2455 SimVRegister temp1, temp2;

	2456 sxtl2(vform, temp1, src1);

	2457 sxtl2(vform, temp2, src2);

	2458 sub(vform, dst, temp1, temp2);

	2459 return dst;

	2460 }

	2461

	2462 LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst,

	2463 const LogicVRegister& src1,

	2464 const LogicVRegister& src2) {

	2465 SimVRegister temp;

	2466 sxtl(vform, temp, src2);

	2467 sub(vform, dst, src1, temp);

	2468 return dst;

	2469 }

	2470

	2471 LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst,

	2472 const LogicVRegister& src1,

	2473 const LogicVRegister& src2) {

	2474 SimVRegister temp;

	2475 sxtl2(vform, temp, src2);

	2476 sub(vform, dst, src1, temp);

	2477 return dst;

	2478 }

	2479

	2480 LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst,

	2481 const LogicVRegister& src1,

	2482 const LogicVRegister& src2) {

	2483 SimVRegister temp1, temp2;

	2484 uxtl(vform, temp1, src1);

	2485 uxtl(vform, temp2, src2);

	2486 uaba(vform, dst, temp1, temp2);

	2487 return dst;

	2488 }

	2489

	2490 LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst,

	2491 const LogicVRegister& src1,

	2492 const LogicVRegister& src2) {

	2493 SimVRegister temp1, temp2;

	2494 uxtl2(vform, temp1, src1);

	2495 uxtl2(vform, temp2, src2);

	2496 uaba(vform, dst, temp1, temp2);

	2497 return dst;

	2498 }

	2499

	2500 LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst,

	2501 const LogicVRegister& src1,

	2502 const LogicVRegister& src2) {

	2503 SimVRegister temp1, temp2;

	2504 sxtl(vform, temp1, src1);

	2505 sxtl(vform, temp2, src2);

	2506 saba(vform, dst, temp1, temp2);

	2507 return dst;

	2508 }

	2509

	2510 LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst,

	2511 const LogicVRegister& src1,

	2512 const LogicVRegister& src2) {

	2513 SimVRegister temp1, temp2;

	2514 sxtl2(vform, temp1, src1);

	2515 sxtl2(vform, temp2, src2);

	2516 saba(vform, dst, temp1, temp2);

	2517 return dst;

	2518 }

	2519

	2520 LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst,

	2521 const LogicVRegister& src1,

	2522 const LogicVRegister& src2) {

	2523 SimVRegister temp1, temp2;

	2524 uxtl(vform, temp1, src1);

	2525 uxtl(vform, temp2, src2);

	2526 AbsDiff(vform, dst, temp1, temp2, false);

	2527 return dst;

	2528 }

	2529

	2530 LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst,

	2531 const LogicVRegister& src1,

	2532 const LogicVRegister& src2) {

	2533 SimVRegister temp1, temp2;

	2534 uxtl2(vform, temp1, src1);

	2535 uxtl2(vform, temp2, src2);

	2536 AbsDiff(vform, dst, temp1, temp2, false);

	2537 return dst;

	2538 }

	2539

	2540 LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst,

	2541 const LogicVRegister& src1,

	2542 const LogicVRegister& src2) {

	2543 SimVRegister temp1, temp2;

	2544 sxtl(vform, temp1, src1);

	2545 sxtl(vform, temp2, src2);

	2546 AbsDiff(vform, dst, temp1, temp2, true);

	2547 return dst;

	2548 }

	2549

	2550 LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst,

	2551 const LogicVRegister& src1,

	2552 const LogicVRegister& src2) {

	2553 SimVRegister temp1, temp2;

	2554 sxtl2(vform, temp1, src1);

	2555 sxtl2(vform, temp2, src2);

	2556 AbsDiff(vform, dst, temp1, temp2, true);

	2557 return dst;

	2558 }

	2559

	2560 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,

	2561 const LogicVRegister& src1,

	2562 const LogicVRegister& src2) {

	2563 SimVRegister temp1, temp2;

	2564 uxtl(vform, temp1, src1);

	2565 uxtl(vform, temp2, src2);

	2566 mul(vform, dst, temp1, temp2);

	2567 return dst;

	2568 }

	2569

	2570 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,

	2571 const LogicVRegister& src1,

	2572 const LogicVRegister& src2) {

	2573 SimVRegister temp1, temp2;

	2574 uxtl2(vform, temp1, src1);

	2575 uxtl2(vform, temp2, src2);

	2576 mul(vform, dst, temp1, temp2);

	2577 return dst;

	2578 }

	2579

	2580 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,

	2581 const LogicVRegister& src1,

	2582 const LogicVRegister& src2) {

	2583 SimVRegister temp1, temp2;

	2584 sxtl(vform, temp1, src1);

	2585 sxtl(vform, temp2, src2);

	2586 mul(vform, dst, temp1, temp2);

	2587 return dst;

	2588 }

	2589

	2590 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,

	2591 const LogicVRegister& src1,

	2592 const LogicVRegister& src2) {

	2593 SimVRegister temp1, temp2;

	2594 sxtl2(vform, temp1, src1);

	2595 sxtl2(vform, temp2, src2);

	2596 mul(vform, dst, temp1, temp2);

	2597 return dst;

	2598 }

	2599

	2600 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,

	2601 const LogicVRegister& src1,

	2602 const LogicVRegister& src2) {

	2603 SimVRegister temp1, temp2;

	2604 uxtl(vform, temp1, src1);

	2605 uxtl(vform, temp2, src2);

	2606 mls(vform, dst, temp1, temp2);

	2607 return dst;

	2608 }

	2609

	2610 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,

	2611 const LogicVRegister& src1,

	2612 const LogicVRegister& src2) {

	2613 SimVRegister temp1, temp2;

	2614 uxtl2(vform, temp1, src1);

	2615 uxtl2(vform, temp2, src2);

	2616 mls(vform, dst, temp1, temp2);

	2617 return dst;

	2618 }

	2619

	2620 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,

	2621 const LogicVRegister& src1,

	2622 const LogicVRegister& src2) {

	2623 SimVRegister temp1, temp2;

	2624 sxtl(vform, temp1, src1);

	2625 sxtl(vform, temp2, src2);

	2626 mls(vform, dst, temp1, temp2);

	2627 return dst;

	2628 }

	2629

	2630 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,

	2631 const LogicVRegister& src1,

	2632 const LogicVRegister& src2) {

	2633 SimVRegister temp1, temp2;

	2634 sxtl2(vform, temp1, src1);

	2635 sxtl2(vform, temp2, src2);

	2636 mls(vform, dst, temp1, temp2);

	2637 return dst;

	2638 }

	2639

	2640 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,

	2641 const LogicVRegister& src1,

	2642 const LogicVRegister& src2) {

	2643 SimVRegister temp1, temp2;

	2644 uxtl(vform, temp1, src1);

	2645 uxtl(vform, temp2, src2);

	2646 mla(vform, dst, temp1, temp2);

	2647 return dst;

	2648 }

	2649

	2650 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,

	2651 const LogicVRegister& src1,

	2652 const LogicVRegister& src2) {

	2653 SimVRegister temp1, temp2;

	2654 uxtl2(vform, temp1, src1);

	2655 uxtl2(vform, temp2, src2);

	2656 mla(vform, dst, temp1, temp2);

	2657 return dst;

	2658 }

	2659

	2660 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,

	2661 const LogicVRegister& src1,

	2662 const LogicVRegister& src2) {

	2663 SimVRegister temp1, temp2;

	2664 sxtl(vform, temp1, src1);

	2665 sxtl(vform, temp2, src2);

	2666 mla(vform, dst, temp1, temp2);

	2667 return dst;

	2668 }

	2669

	2670 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,

	2671 const LogicVRegister& src1,

	2672 const LogicVRegister& src2) {

	2673 SimVRegister temp1, temp2;

	2674 sxtl2(vform, temp1, src1);

	2675 sxtl2(vform, temp2, src2);

	2676 mla(vform, dst, temp1, temp2);

	2677 return dst;

	2678 }

	2679

	2680 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,

	2681 const LogicVRegister& src1,

	2682 const LogicVRegister& src2) {

	2683 SimVRegister temp;

	2684 LogicVRegister product = sqdmull(vform, temp, src1, src2);

	2685 return add(vform, dst, dst, product).SignedSaturate(vform);

	2686 }

	2687

	2688 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,

	2689 const LogicVRegister& src1,

	2690 const LogicVRegister& src2) {

	2691 SimVRegister temp;

	2692 LogicVRegister product = sqdmull2(vform, temp, src1, src2);

	2693 return add(vform, dst, dst, product).SignedSaturate(vform);

	2694 }

	2695

	2696 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,

	2697 const LogicVRegister& src1,

	2698 const LogicVRegister& src2) {

	2699 SimVRegister temp;

	2700 LogicVRegister product = sqdmull(vform, temp, src1, src2);

	2701 return sub(vform, dst, dst, product).SignedSaturate(vform);

	2702 }

	2703

	2704 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,

	2705 const LogicVRegister& src1,

	2706 const LogicVRegister& src2) {

	2707 SimVRegister temp;

	2708 LogicVRegister product = sqdmull2(vform, temp, src1, src2);

	2709 return sub(vform, dst, dst, product).SignedSaturate(vform);

	2710 }

	2711

	2712 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,

	2713 const LogicVRegister& src1,

	2714 const LogicVRegister& src2) {

	2715 SimVRegister temp;

	2716 LogicVRegister product = smull(vform, temp, src1, src2);

	2717 return add(vform, dst, product, product).SignedSaturate(vform);

	2718 }

	2719

	2720 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,

	2721 const LogicVRegister& src1,

	2722 const LogicVRegister& src2) {

	2723 SimVRegister temp;

	2724 LogicVRegister product = smull2(vform, temp, src1, src2);

	2725 return add(vform, dst, product, product).SignedSaturate(vform);

	2726 }

	2727

	2728 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,

	2729 const LogicVRegister& src1,

	2730 const LogicVRegister& src2, bool round) {

	2731 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.

	2732 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)

	2733 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.

	2734

	2735 int esize = LaneSizeInBitsFromFormat(vform);

	2736 int round_const = round ? (1 << (esize - 2)) : 0;

	2737 int64_t product;

	2738

	2739 dst.ClearForWrite(vform);

	2740 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	2741 product = src1.Int(vform, i) * src2.Int(vform, i);

	2742 product += round_const;

	2743 product = product >> (esize - 1);

	2744

	2745 if (product > MaxIntFromFormat(vform)) {

	2746 product = MaxIntFromFormat(vform);

	2747 } else if (product < MinIntFromFormat(vform)) {

	2748 product = MinIntFromFormat(vform);

	2749 }

	2750 dst.SetInt(vform, i, product);

	2751 }

	2752 return dst;

	2753 }

	2754

	2755 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,

	2756 const LogicVRegister& src1,

	2757 const LogicVRegister& src2) {

	2758 return sqrdmulh(vform, dst, src1, src2, false);

	2759 }

	2760

	2761 LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst,

	2762 const LogicVRegister& src1,

	2763 const LogicVRegister& src2) {

	2764 SimVRegister temp;

	2765 add(VectorFormatDoubleWidth(vform), temp, src1, src2);

	2766 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2767 return dst;

	2768 }

	2769

	2770 LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst,

	2771 const LogicVRegister& src1,

	2772 const LogicVRegister& src2) {

	2773 SimVRegister temp;

	2774 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);

	2775 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2776 return dst;

	2777 }

	2778

	2779 LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst,

	2780 const LogicVRegister& src1,

	2781 const LogicVRegister& src2) {

	2782 SimVRegister temp;

	2783 add(VectorFormatDoubleWidth(vform), temp, src1, src2);

	2784 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2785 return dst;

	2786 }

	2787

	2788 LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst,

	2789 const LogicVRegister& src1,

	2790 const LogicVRegister& src2) {

	2791 SimVRegister temp;

	2792 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);

	2793 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2794 return dst;

	2795 }

	2796

	2797 LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst,

	2798 const LogicVRegister& src1,

	2799 const LogicVRegister& src2) {

	2800 SimVRegister temp;

	2801 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);

	2802 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2803 return dst;

	2804 }

	2805

	2806 LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst,

	2807 const LogicVRegister& src1,

	2808 const LogicVRegister& src2) {

	2809 SimVRegister temp;

	2810 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);

	2811 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2812 return dst;

	2813 }

	2814

	2815 LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst,

	2816 const LogicVRegister& src1,

	2817 const LogicVRegister& src2) {

	2818 SimVRegister temp;

	2819 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);

	2820 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2821 return dst;

	2822 }

	2823

	2824 LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst,

	2825 const LogicVRegister& src1,

	2826 const LogicVRegister& src2) {

	2827 SimVRegister temp;

	2828 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);

	2829 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2830 return dst;

	2831 }

	2832

	2833 LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst,

	2834 const LogicVRegister& src1,

	2835 const LogicVRegister& src2) {

	2836 uint64_t result[16];

	2837 int laneCount = LaneCountFromFormat(vform);

	2838 int pairs = laneCount / 2;

	2839 for (int i = 0; i < pairs; ++i) {

	2840 result[2 * i] = src1.Uint(vform, 2 * i);

	2841 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);

	2842 }

	2843

	2844 dst.SetUintArray(vform, result);

	2845 return dst;

	2846 }

	2847

	2848 LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst,

	2849 const LogicVRegister& src1,

	2850 const LogicVRegister& src2) {

	2851 uint64_t result[16];

	2852 int laneCount = LaneCountFromFormat(vform);

	2853 int pairs = laneCount / 2;

	2854 for (int i = 0; i < pairs; ++i) {

	2855 result[2 * i] = src1.Uint(vform, (2 * i) + 1);

	2856 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);

	2857 }

	2858

	2859 dst.SetUintArray(vform, result);

	2860 return dst;

	2861 }

	2862

	2863 LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst,

	2864 const LogicVRegister& src1,

	2865 const LogicVRegister& src2) {

	2866 uint64_t result[16];

	2867 int laneCount = LaneCountFromFormat(vform);

	2868 int pairs = laneCount / 2;

	2869 for (int i = 0; i < pairs; ++i) {

	2870 result[2 * i] = src1.Uint(vform, i);

	2871 result[(2 * i) + 1] = src2.Uint(vform, i);

	2872 }

	2873

	2874 dst.SetUintArray(vform, result);

	2875 return dst;

	2876 }

	2877

	2878 LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst,

	2879 const LogicVRegister& src1,

	2880 const LogicVRegister& src2) {

	2881 uint64_t result[16];

	2882 int laneCount = LaneCountFromFormat(vform);

	2883 int pairs = laneCount / 2;

	2884 for (int i = 0; i < pairs; ++i) {

	2885 result[2 * i] = src1.Uint(vform, pairs + i);

	2886 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);

	2887 }

	2888

	2889 dst.SetUintArray(vform, result);

	2890 return dst;

	2891 }

	2892

	2893 LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst,

	2894 const LogicVRegister& src1,

	2895 const LogicVRegister& src2) {

	2896 uint64_t result[32];

	2897 int laneCount = LaneCountFromFormat(vform);

	2898 for (int i = 0; i < laneCount; ++i) {

	2899 result[i] = src1.Uint(vform, i);

	2900 result[laneCount + i] = src2.Uint(vform, i);

	2901 }

	2902

	2903 dst.ClearForWrite(vform);

	2904 for (int i = 0; i < laneCount; ++i) {

	2905 dst.SetUint(vform, i, result[2 * i]);

	2906 }

	2907 return dst;

	2908 }

	2909

	2910 LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst,

	2911 const LogicVRegister& src1,

	2912 const LogicVRegister& src2) {

	2913 uint64_t result[32];

	2914 int laneCount = LaneCountFromFormat(vform);

	2915 for (int i = 0; i < laneCount; ++i) {

	2916 result[i] = src1.Uint(vform, i);

	2917 result[laneCount + i] = src2.Uint(vform, i);

	2918 }

	2919

	2920 dst.ClearForWrite(vform);

	2921 for (int i = 0; i < laneCount; ++i) {

	2922 dst.SetUint(vform, i, result[(2 * i) + 1]);

	2923 }

	2924 return dst;

	2925 }

	2926

	2927 template <typename T>

	2928 T Simulator::FPAdd(T op1, T op2) {

	2929 T result = FPProcessNaNs(op1, op2);

	2930 if (std::isnan(result)) return result;

	2931

	2932 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {

	2933 // inf + -inf returns the default NaN.

	2934 FPProcessException();

	2935 return FPDefaultNaN<T>();

	2936 } else {

	2937 // Other cases should be handled by standard arithmetic.

	2938 return op1 + op2;

	2939 }

	2940 }

	2941

	2942 template <typename T>

	2943 T Simulator::FPSub(T op1, T op2) {

	2944 // NaNs should be handled elsewhere.

	2945 DCHECK(!std::isnan(op1) && !std::isnan(op2));

	2946

	2947 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {

	2948 // inf - inf returns the default NaN.

	2949 FPProcessException();

	2950 return FPDefaultNaN<T>();

	2951 } else {

	2952 // Other cases should be handled by standard arithmetic.

	2953 return op1 - op2;

	2954 }

	2955 }

	2956

	2957 template <typename T>

	2958 T Simulator::FPMul(T op1, T op2) {

	2959 // NaNs should be handled elsewhere.

	2960 DCHECK(!std::isnan(op1) && !std::isnan(op2));

	2961

	2962 if ((std::isinf(op1) && (op2 == 0.0)) \|\| (std::isinf(op2) && (op1 == 0.0))) {

	2963 // inf * 0.0 returns the default NaN.

	2964 FPProcessException();

	2965 return FPDefaultNaN<T>();

	2966 } else {

	2967 // Other cases should be handled by standard arithmetic.

	2968 return op1 * op2;

	2969 }

	2970 }

	2971

	2972 template <typename T>

	2973 T Simulator::FPMulx(T op1, T op2) {

	2974 if ((std::isinf(op1) && (op2 == 0.0)) \|\| (std::isinf(op2) && (op1 == 0.0))) {

	2975 // inf * 0.0 returns +/-2.0.

	2976 T two = 2.0;

	2977 return copysign(1.0, op1) * copysign(1.0, op2) * two;

	2978 }

	2979 return FPMul(op1, op2);

	2980 }

	2981

	2982 template <typename T>

	2983 T Simulator::FPMulAdd(T a, T op1, T op2) {

	2984 T result = FPProcessNaNs3(a, op1, op2);

	2985

	2986 T sign_a = copysign(1.0, a);

	2987 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);

	2988 bool isinf_prod = std::isinf(op1) \|\| std::isinf(op2);

	2989 bool operation_generates_nan =

	2990 (std::isinf(op1) && (op2 == 0.0)) \|\| // inf * 0.0

	2991 (std::isinf(op2) && (op1 == 0.0)) \|\| // 0.0 * inf

	2992 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf

	2993

	2994 if (std::isnan(result)) {

	2995 // Generated NaNs override quiet NaNs propagated from a.

	2996 if (operation_generates_nan && IsQuietNaN(a)) {

	2997 FPProcessException();

	2998 return FPDefaultNaN<T>();

	2999 } else {

	3000 return result;

	3001 }

	3002 }

	3003

	3004 // If the operation would produce a NaN, return the default NaN.

	3005 if (operation_generates_nan) {

	3006 FPProcessException();

	3007 return FPDefaultNaN<T>();

	3008 }

	3009

	3010 // Work around broken fma implementations for exact zero results: The sign of

	3011 // exact 0.0 results is positive unless both a and op1 * op2 are negative.

	3012 if (((op1 == 0.0) \|\| (op2 == 0.0)) && (a == 0.0)) {

	3013 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;

	3014 }

	3015

	3016 result = FusedMultiplyAdd(op1, op2, a);

	3017 DCHECK(!std::isnan(result));

	3018

	3019 // Work around broken fma implementations for rounded zero results: If a is

	3020 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.

	3021 if ((a == 0.0) && (result == 0.0)) {

	3022 return copysign(0.0, sign_prod);

	3023 }

	3024

	3025 return result;

	3026 }

	3027

	3028 template <typename T>

	3029 T Simulator::FPDiv(T op1, T op2) {

	3030 // NaNs should be handled elsewhere.

	3031 DCHECK(!std::isnan(op1) && !std::isnan(op2));

	3032

	3033 if ((std::isinf(op1) && std::isinf(op2)) \|\| ((op1 == 0.0) && (op2 == 0.0))) {

	3034 // inf / inf and 0.0 / 0.0 return the default NaN.

	3035 FPProcessException();

	3036 return FPDefaultNaN<T>();

	3037 } else {

	3038 if (op2 == 0.0) {

	3039 FPProcessException();

	3040 if (!std::isnan(op1)) {

	3041 double op1_sign = copysign(1.0, op1);

	3042 double op2_sign = copysign(1.0, op2);

	3043 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);

	3044 }

	3045 }

	3046

	3047 // Other cases should be handled by standard arithmetic.

	3048 return op1 / op2;

	3049 }

	3050 }

	3051

	3052 template <typename T>

	3053 T Simulator::FPSqrt(T op) {

	3054 if (std::isnan(op)) {

	3055 return FPProcessNaN(op);

	3056 } else if (op < 0.0) {

	3057 FPProcessException();

	3058 return FPDefaultNaN<T>();

	3059 } else {

	3060 return sqrt(op);

	3061 }

	3062 }

	3063

	3064 template <typename T>

	3065 T Simulator::FPMax(T a, T b) {

	3066 T result = FPProcessNaNs(a, b);

	3067 if (std::isnan(result)) return result;

	3068

	3069 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {

	3070 // a and b are zero, and the sign differs: return +0.0.

	3071 return 0.0;

	3072 } else {

	3073 return (a > b) ? a : b;

	3074 }

	3075 }

	3076

	3077 template <typename T>

	3078 T Simulator::FPMaxNM(T a, T b) {

	3079 if (IsQuietNaN(a) && !IsQuietNaN(b)) {

	3080 a = kFP64NegativeInfinity;

	3081 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {

	3082 b = kFP64NegativeInfinity;

	3083 }

	3084

	3085 T result = FPProcessNaNs(a, b);

	3086 return std::isnan(result) ? result : FPMax(a, b);

	3087 }

	3088

	3089 template <typename T>

	3090 T Simulator::FPMin(T a, T b) {

	3091 T result = FPProcessNaNs(a, b);

	3092 if (std::isnan(result)) return result;

	3093

	3094 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {

	3095 // a and b are zero, and the sign differs: return -0.0.

	3096 return -0.0;

	3097 } else {

	3098 return (a < b) ? a : b;

	3099 }

	3100 }

	3101

	3102 template <typename T>

	3103 T Simulator::FPMinNM(T a, T b) {

	3104 if (IsQuietNaN(a) && !IsQuietNaN(b)) {

	3105 a = kFP64PositiveInfinity;

	3106 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {

	3107 b = kFP64PositiveInfinity;

	3108 }

	3109

	3110 T result = FPProcessNaNs(a, b);

	3111 return std::isnan(result) ? result : FPMin(a, b);

	3112 }

	3113

	3114 template <typename T>

	3115 T Simulator::FPRecipStepFused(T op1, T op2) {

	3116 const T two = 2.0;

	3117 if ((std::isinf(op1) && (op2 == 0.0)) \|\|

	3118 ((op1 == 0.0) && (std::isinf(op2)))) {

	3119 return two;

	3120 } else if (std::isinf(op1) \|\| std::isinf(op2)) {

	3121 // Return +inf if signs match, otherwise -inf.

	3122 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity

	3123 : kFP64NegativeInfinity;

	3124 } else {

	3125 return FusedMultiplyAdd(op1, op2, two);

	3126 }

	3127 }

	3128

	3129 template <typename T>

	3130 T Simulator::FPRSqrtStepFused(T op1, T op2) {

	3131 const T one_point_five = 1.5;

	3132 const T two = 2.0;

	3133

	3134 if ((std::isinf(op1) && (op2 == 0.0)) \|\|

	3135 ((op1 == 0.0) && (std::isinf(op2)))) {

	3136 return one_point_five;

	3137 } else if (std::isinf(op1) \|\| std::isinf(op2)) {

	3138 // Return +inf if signs match, otherwise -inf.

	3139 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity

	3140 : kFP64NegativeInfinity;

	3141 } else {

	3142 // The multiply-add-halve operation must be fully fused, so avoid interim

	3143 // rounding by checking which operand can be losslessly divided by two

	3144 // before doing the multiply-add.

	3145 if (std::isnormal(op1 / two)) {

	3146 return FusedMultiplyAdd(op1 / two, op2, one_point_five);

	3147 } else if (std::isnormal(op2 / two)) {

	3148 return FusedMultiplyAdd(op1, op2 / two, one_point_five);

	3149 } else {

	3150 // Neither operand is normal after halving: the result is dominated by

	3151 // the addition term, so just return that.

	3152 return one_point_five;

	3153 }

	3154 }

	3155 }

	3156

	3157 double Simulator::FPRoundInt(double value, FPRounding round_mode) {

	3158 if ((value == 0.0) \|\| (value == kFP64PositiveInfinity) \|\|

	3159 (value == kFP64NegativeInfinity)) {

	3160 return value;

	3161 } else if (std::isnan(value)) {

	3162 return FPProcessNaN(value);

	3163 }

	3164

	3165 double int_result = std::floor(value);

	3166 double error = value - int_result;

	3167 switch (round_mode) {

	3168 case FPTieAway: {

	3169 // Take care of correctly handling the range ]-0.5, -0.0], which must

	3170 // yield -0.0.

	3171 if ((-0.5 < value) && (value < 0.0)) {

	3172 int_result = -0.0;

	3173

	3174 } else if ((error > 0.5) \|\| ((error == 0.5) && (int_result >= 0.0))) {

	3175 // If the error is greater than 0.5, or is equal to 0.5 and the integer

	3176 // result is positive, round up.

	3177 int_result++;

	3178 }

	3179 break;

	3180 }

	3181 case FPTieEven: {

	3182 // Take care of correctly handling the range [-0.5, -0.0], which must

	3183 // yield -0.0.

	3184 if ((-0.5 <= value) && (value < 0.0)) {

	3185 int_result = -0.0;

	3186

	3187 // If the error is greater than 0.5, or is equal to 0.5 and the integer

	3188 // result is odd, round up.

	3189 } else if ((error > 0.5) \|\|

	3190 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {

	3191 int_result++;

	3192 }

	3193 break;

	3194 }

	3195 case FPZero: {

	3196 // If value>0 then we take floor(value)

	3197 // otherwise, ceil(value).

	3198 if (value < 0) {

	3199 int_result = ceil(value);

	3200 }

	3201 break;

	3202 }

	3203 case FPNegativeInfinity: {

	3204 // We always use floor(value).

	3205 break;

	3206 }

	3207 case FPPositiveInfinity: {

	3208 // Take care of correctly handling the range ]-1.0, -0.0], which must

	3209 // yield -0.0.

	3210 if ((-1.0 < value) && (value < 0.0)) {

	3211 int_result = -0.0;

	3212

	3213 // If the error is non-zero, round up.

	3214 } else if (error > 0.0) {

	3215 int_result++;

	3216 }

	3217 break;

	3218 }

	3219 default:

	3220 UNIMPLEMENTED();

	3221 }

	3222 return int_result;

	3223 }

	3224

	3225 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {

	3226 value = FPRoundInt(value, rmode);

	3227 if (value >= kWMaxInt) {

	3228 return kWMaxInt;

	3229 } else if (value < kWMinInt) {

	3230 return kWMinInt;

	3231 }

	3232 return std::isnan(value) ? 0 : static_cast<int32_t>(value);

	3233 }

	3234

	3235 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {

	3236 value = FPRoundInt(value, rmode);

	3237 if (value >= kXMaxInt) {

	3238 return kXMaxInt;

	3239 } else if (value < kXMinInt) {

	3240 return kXMinInt;

	3241 }

	3242 return std::isnan(value) ? 0 : static_cast<int64_t>(value);

	3243 }

	3244

	3245 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {

	3246 value = FPRoundInt(value, rmode);

	3247 if (value >= kWMaxUInt) {

	3248 return kWMaxUInt;

	3249 } else if (value < 0.0) {

	3250 return 0;

	3251 }

	3252 return std::isnan(value) ? 0 : static_cast<uint32_t>(value);

	3253 }

	3254

	3255 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {

	3256 value = FPRoundInt(value, rmode);

	3257 if (value >= kXMaxUInt) {

	3258 return kXMaxUInt;

	3259 } else if (value < 0.0) {

	3260 return 0;

	3261 }

	3262 return std::isnan(value) ? 0 : static_cast<uint64_t>(value);

	3263 }

	3264

	3265 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \

	3266 template <typename T> \

	3267 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \

	3268 const LogicVRegister& src1, \

	3269 const LogicVRegister& src2) { \

	3270 dst.ClearForWrite(vform); \

	3271 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \

	3272 T op1 = src1.Float<T>(i); \

	3273 T op2 = src2.Float<T>(i); \

	3274 T result; \

	3275 if (PROCNAN) { \

	3276 result = FPProcessNaNs(op1, op2); \

	3277 if (!std::isnan(result)) { \

	3278 result = OP(op1, op2); \

	3279 } \

	3280 } else { \

	3281 result = OP(op1, op2); \

	3282 } \

	3283 dst.SetFloat(i, result); \

	3284 } \

	3285 return dst; \

	3286 } \

	3287 \

	3288 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \

	3289 const LogicVRegister& src1, \

	3290 const LogicVRegister& src2) { \

	3291 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { \

	3292 FN<float>(vform, dst, src1, src2); \

	3293 } else { \

	3294 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); \

	3295 FN<double>(vform, dst, src1, src2); \

	3296 } \

	3297 return dst; \

	3298 }

	3299 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)

	3300 #undef DEFINE_NEON_FP_VECTOR_OP

	3301

	3302 LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst,

	3303 const LogicVRegister& src1,

	3304 const LogicVRegister& src2) {

	3305 SimVRegister temp;

	3306 LogicVRegister product = fmul(vform, temp, src1, src2);

	3307 return fneg(vform, dst, product);

	3308 }

	3309

	3310 template <typename T>

	3311 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,

	3312 const LogicVRegister& src1,

	3313 const LogicVRegister& src2) {

	3314 dst.ClearForWrite(vform);

	3315 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3316 T op1 = -src1.Float<T>(i);

	3317 T op2 = src2.Float<T>(i);

	3318 T result = FPProcessNaNs(op1, op2);

	3319 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));

	3320 }

	3321 return dst;

	3322 }

	3323

	3324 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,

	3325 const LogicVRegister& src1,

	3326 const LogicVRegister& src2) {

	3327 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3328 frecps<float>(vform, dst, src1, src2);

	3329 } else {

	3330 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3331 frecps<double>(vform, dst, src1, src2);

	3332 }

	3333 return dst;

	3334 }

	3335

	3336 template <typename T>

	3337 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,

	3338 const LogicVRegister& src1,

	3339 const LogicVRegister& src2) {

	3340 dst.ClearForWrite(vform);

	3341 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3342 T op1 = -src1.Float<T>(i);

	3343 T op2 = src2.Float<T>(i);

	3344 T result = FPProcessNaNs(op1, op2);

	3345 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));

	3346 }

	3347 return dst;

	3348 }

	3349

	3350 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,

	3351 const LogicVRegister& src1,

	3352 const LogicVRegister& src2) {

	3353 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3354 frsqrts<float>(vform, dst, src1, src2);

	3355 } else {

	3356 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3357 frsqrts<double>(vform, dst, src1, src2);

	3358 }

	3359 return dst;

	3360 }

	3361

	3362 template <typename T>

	3363 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,

	3364 const LogicVRegister& src1,

	3365 const LogicVRegister& src2, Condition cond) {

	3366 dst.ClearForWrite(vform);

	3367 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3368 bool result = false;

	3369 T op1 = src1.Float<T>(i);

	3370 T op2 = src2.Float<T>(i);

	3371 T nan_result = FPProcessNaNs(op1, op2);

	3372 if (!std::isnan(nan_result)) {

	3373 switch (cond) {

	3374 case eq:

	3375 result = (op1 == op2);

	3376 break;

	3377 case ge:

	3378 result = (op1 >= op2);

	3379 break;

	3380 case gt:

	3381 result = (op1 > op2);

	3382 break;

	3383 case le:

	3384 result = (op1 <= op2);

	3385 break;

	3386 case lt:

	3387 result = (op1 < op2);

	3388 break;

	3389 default:

	3390 UNREACHABLE();

	3391 break;

	3392 }

	3393 }

	3394 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);

	3395 }

	3396 return dst;

	3397 }

	3398

	3399 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,

	3400 const LogicVRegister& src1,

	3401 const LogicVRegister& src2, Condition cond) {

	3402 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3403 fcmp<float>(vform, dst, src1, src2, cond);

	3404 } else {

	3405 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3406 fcmp<double>(vform, dst, src1, src2, cond);

	3407 }

	3408 return dst;

	3409 }

	3410

	3411 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst,

	3412 const LogicVRegister& src, Condition cond) {

	3413 SimVRegister temp;

	3414 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3415 LogicVRegister zero_reg =

	3416 dup_immediate(vform, temp, bit_cast<uint32_t>(0.0f));

	3417 fcmp<float>(vform, dst, src, zero_reg, cond);

	3418 } else {

	3419 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3420 LogicVRegister zero_reg =

	3421 dup_immediate(vform, temp, bit_cast<uint64_t>(0.0));

	3422 fcmp<double>(vform, dst, src, zero_reg, cond);

	3423 }

	3424 return dst;

	3425 }

	3426

	3427 LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst,

	3428 const LogicVRegister& src1,

	3429 const LogicVRegister& src2, Condition cond) {

	3430 SimVRegister temp1, temp2;

	3431 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3432 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);

	3433 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);

	3434 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);

	3435 } else {

	3436 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3437 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);

	3438 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);

	3439 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);

	3440 }

	3441 return dst;

	3442 }

	3443

	3444 template <typename T>

	3445 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,

	3446 const LogicVRegister& src1,

	3447 const LogicVRegister& src2) {

	3448 dst.ClearForWrite(vform);

	3449 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3450 T op1 = src1.Float<T>(i);

	3451 T op2 = src2.Float<T>(i);

	3452 T acc = dst.Float<T>(i);

	3453 T result = FPMulAdd(acc, op1, op2);

	3454 dst.SetFloat(i, result);

	3455 }

	3456 return dst;

	3457 }

	3458

	3459 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,

	3460 const LogicVRegister& src1,

	3461 const LogicVRegister& src2) {

	3462 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3463 fmla<float>(vform, dst, src1, src2);

	3464 } else {

	3465 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3466 fmla<double>(vform, dst, src1, src2);

	3467 }

	3468 return dst;

	3469 }

	3470

	3471 template <typename T>

	3472 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,

	3473 const LogicVRegister& src1,

	3474 const LogicVRegister& src2) {

	3475 dst.ClearForWrite(vform);

	3476 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3477 T op1 = -src1.Float<T>(i);

	3478 T op2 = src2.Float<T>(i);

	3479 T acc = dst.Float<T>(i);

	3480 T result = FPMulAdd(acc, op1, op2);

	3481 dst.SetFloat(i, result);

	3482 }

	3483 return dst;

	3484 }

	3485

	3486 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,

	3487 const LogicVRegister& src1,

	3488 const LogicVRegister& src2) {

	3489 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3490 fmls<float>(vform, dst, src1, src2);

	3491 } else {

	3492 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3493 fmls<double>(vform, dst, src1, src2);

	3494 }

	3495 return dst;

	3496 }

	3497

	3498 template <typename T>

	3499 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,

	3500 const LogicVRegister& src) {

	3501 dst.ClearForWrite(vform);

	3502 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3503 T op = src.Float<T>(i);

	3504 op = -op;

	3505 dst.SetFloat(i, op);

	3506 }

	3507 return dst;

	3508 }

	3509

	3510 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,

	3511 const LogicVRegister& src) {

	3512 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3513 fneg<float>(vform, dst, src);

	3514 } else {

	3515 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3516 fneg<double>(vform, dst, src);

	3517 }

	3518 return dst;

	3519 }

	3520

	3521 template <typename T>

	3522 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,

	3523 const LogicVRegister& src) {

	3524 dst.ClearForWrite(vform);

	3525 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3526 T op = src.Float<T>(i);

	3527 if (copysign(1.0, op) < 0.0) {

	3528 op = -op;

	3529 }

	3530 dst.SetFloat(i, op);

	3531 }

	3532 return dst;

	3533 }

	3534

	3535 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,

	3536 const LogicVRegister& src) {

	3537 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3538 fabs_<float>(vform, dst, src);

	3539 } else {

	3540 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3541 fabs_<double>(vform, dst, src);

	3542 }

	3543 return dst;

	3544 }

	3545

	3546 LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst,

	3547 const LogicVRegister& src1,

	3548 const LogicVRegister& src2) {

	3549 SimVRegister temp;

	3550 fsub(vform, temp, src1, src2);

	3551 fabs_(vform, dst, temp);

	3552 return dst;

	3553 }

	3554

	3555 LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst,

	3556 const LogicVRegister& src) {

	3557 dst.ClearForWrite(vform);

	3558 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3559 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3560 float result = FPSqrt(src.Float<float>(i));

	3561 dst.SetFloat(i, result);

	3562 }

	3563 } else {

	3564 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3565 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3566 double result = FPSqrt(src.Float<double>(i));

	3567 dst.SetFloat(i, result);

	3568 }

	3569 }

	3570 return dst;

	3571 }

	3572

	3573 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \

	3574 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \

	3575 const LogicVRegister& src1, \

	3576 const LogicVRegister& src2) { \

	3577 SimVRegister temp1, temp2; \

	3578 uzp1(vform, temp1, src1, src2); \

	3579 uzp2(vform, temp2, src1, src2); \

	3580 FN(vform, dst, temp1, temp2); \

	3581 return dst; \

	3582 } \

	3583 \

	3584 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \

	3585 const LogicVRegister& src) { \

	3586 if (vform == kFormatS) { \

	3587 float result = OP(src.Float<float>(0), src.Float<float>(1)); \

	3588 dst.SetFloat(0, result); \

	3589 } else { \

	3590 DCHECK_EQ(vform, kFormatD); \

	3591 double result = OP(src.Float<double>(0), src.Float<double>(1)); \

	3592 dst.SetFloat(0, result); \

	3593 } \

	3594 dst.ClearForWrite(vform); \

	3595 return dst; \

	3596 }

	3597 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)

	3598 #undef DEFINE_NEON_FP_PAIR_OP

	3599

	3600 LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst,

	3601 const LogicVRegister& src, FPMinMaxOp Op) {

	3602 DCHECK_EQ(vform, kFormat4S);

	3603 USE(vform);

	3604 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));

	3605 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));

	3606 float result = (this->*Op)(result1, result2);

	3607 dst.ClearForWrite(kFormatS);

	3608 dst.SetFloat<float>(0, result);

	3609 return dst;

	3610 }

	3611

	3612 LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst,

	3613 const LogicVRegister& src) {

	3614 return FMinMaxV(vform, dst, src, &Simulator::FPMax);

	3615 }

	3616

	3617 LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst,

	3618 const LogicVRegister& src) {

	3619 return FMinMaxV(vform, dst, src, &Simulator::FPMin);

	3620 }

	3621

	3622 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst,

	3623 const LogicVRegister& src) {

	3624 return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM);

	3625 }

	3626

	3627 LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst,

	3628 const LogicVRegister& src) {

	3629 return FMinMaxV(vform, dst, src, &Simulator::FPMinNM);

	3630 }

	3631

	3632 LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst,

	3633 const LogicVRegister& src1,

	3634 const LogicVRegister& src2, int index) {

	3635 dst.ClearForWrite(vform);

	3636 SimVRegister temp;

	3637 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3638 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);

	3639 fmul<float>(vform, dst, src1, index_reg);

	3640 } else {

	3641 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3642 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);

	3643 fmul<double>(vform, dst, src1, index_reg);

	3644 }

	3645 return dst;

	3646 }

	3647

	3648 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,

	3649 const LogicVRegister& src1,

	3650 const LogicVRegister& src2, int index) {

	3651 dst.ClearForWrite(vform);

	3652 SimVRegister temp;

	3653 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3654 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);

	3655 fmla<float>(vform, dst, src1, index_reg);

	3656 } else {

	3657 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3658 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);

	3659 fmla<double>(vform, dst, src1, index_reg);

	3660 }

	3661 return dst;

	3662 }

	3663

	3664 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,

	3665 const LogicVRegister& src1,

	3666 const LogicVRegister& src2, int index) {

	3667 dst.ClearForWrite(vform);

	3668 SimVRegister temp;

	3669 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3670 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);

	3671 fmls<float>(vform, dst, src1, index_reg);

	3672 } else {

	3673 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3674 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);

	3675 fmls<double>(vform, dst, src1, index_reg);

	3676 }

	3677 return dst;

	3678 }

	3679

	3680 LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst,

	3681 const LogicVRegister& src1,

	3682 const LogicVRegister& src2, int index) {

	3683 dst.ClearForWrite(vform);

	3684 SimVRegister temp;

	3685 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3686 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);

	3687 fmulx<float>(vform, dst, src1, index_reg);

	3688

	3689 } else {

	3690 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3691 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);

	3692 fmulx<double>(vform, dst, src1, index_reg);

	3693 }

	3694 return dst;

	3695 }

	3696

	3697 LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst,

	3698 const LogicVRegister& src,

	3699 FPRounding rounding_mode,

	3700 bool inexact_exception) {

	3701 dst.ClearForWrite(vform);

	3702 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3703 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3704 float input = src.Float<float>(i);

	3705 float rounded = FPRoundInt(input, rounding_mode);

	3706 if (inexact_exception && !std::isnan(input) && (input != rounded)) {

	3707 FPProcessException();

	3708 }

	3709 dst.SetFloat<float>(i, rounded);

	3710 }

	3711 } else {

	3712 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3713 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3714 double input = src.Float<double>(i);

	3715 double rounded = FPRoundInt(input, rounding_mode);

	3716 if (inexact_exception && !std::isnan(input) && (input != rounded)) {

	3717 FPProcessException();

	3718 }

	3719 dst.SetFloat<double>(i, rounded);

	3720 }

	3721 }

	3722 return dst;

	3723 }

	3724

	3725 LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst,

	3726 const LogicVRegister& src,

	3727 FPRounding rounding_mode, int fbits) {

	3728 dst.ClearForWrite(vform);

	3729 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3730 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3731 float op = src.Float<float>(i) * std::pow(2.0f, fbits);

	3732 dst.SetInt(vform, i, FPToInt32(op, rounding_mode));

	3733 }

	3734 } else {

	3735 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3736 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3737 double op = src.Float<double>(i) * std::pow(2.0, fbits);

	3738 dst.SetInt(vform, i, FPToInt64(op, rounding_mode));

	3739 }

	3740 }

	3741 return dst;

	3742 }

	3743

	3744 LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst,

	3745 const LogicVRegister& src,

	3746 FPRounding rounding_mode, int fbits) {

	3747 dst.ClearForWrite(vform);

	3748 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3749 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3750 float op = src.Float<float>(i) * std::pow(2.0f, fbits);

	3751 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));

	3752 }

	3753 } else {

	3754 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3755 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3756 double op = src.Float<double>(i) * std::pow(2.0, fbits);

	3757 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));

	3758 }

	3759 }

	3760 return dst;

	3761 }

	3762

	3763 LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst,

	3764 const LogicVRegister& src) {

	3765 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3766 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {

	3767 dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));

	3768 }

	3769 } else {

	3770 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3771 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {

	3772 dst.SetFloat(i, FPToDouble(src.Float<float>(i)));

	3773 }

	3774 }

	3775 return dst;

	3776 }

	3777

	3778 LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst,

	3779 const LogicVRegister& src) {

	3780 int lane_count = LaneCountFromFormat(vform);

	3781 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3782 for (int i = 0; i < lane_count; i++) {

	3783 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));

	3784 }

	3785 } else {

	3786 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3787 for (int i = 0; i < lane_count; i++) {

	3788 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));

	3789 }

	3790 }

	3791 return dst;

	3792 }

	3793

	3794 LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst,

	3795 const LogicVRegister& src) {

	3796 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {

	3797 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3798 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));

	3799 }

	3800 } else {

	3801 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);

	3802 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3803 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));

	3804 }

	3805 }

	3806 return dst;

	3807 }

	3808

	3809 LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst,

	3810 const LogicVRegister& src) {

	3811 int lane_count = LaneCountFromFormat(vform) / 2;

	3812 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {

	3813 for (int i = lane_count - 1; i >= 0; i--) {

	3814 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));

	3815 }

	3816 } else {

	3817 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);

	3818 for (int i = lane_count - 1; i >= 0; i--) {

	3819 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));

	3820 }

	3821 }

	3822 return dst;

	3823 }

	3824

	3825 LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst,

	3826 const LogicVRegister& src) {

	3827 dst.ClearForWrite(vform);

	3828 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);

	3829 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3830 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));

	3831 }

	3832 return dst;

	3833 }

	3834

	3835 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst,

	3836 const LogicVRegister& src) {

	3837 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);

	3838 int lane_count = LaneCountFromFormat(vform) / 2;

	3839 for (int i = lane_count - 1; i >= 0; i--) {

	3840 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));

	3841 }

	3842 return dst;

	3843 }

	3844

	3845 // Based on reference C function recip_sqrt_estimate from ARM ARM.

	3846 double Simulator::recip_sqrt_estimate(double a) {

	3847 int q0, q1, s;

	3848 double r;

	3849 if (a < 0.5) {

	3850 q0 = static_cast<int>(a * 512.0);

	3851 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);

	3852 } else {

	3853 q1 = static_cast<int>(a * 256.0);

	3854 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);

	3855 }

	3856 s = static_cast<int>(256.0 * r + 0.5);

	3857 return static_cast<double>(s) / 256.0;

	3858 }

	3859

	3860 namespace {

	3861

	3862 inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {

	3863 return unsigned_bitextract_64(start_bit, end_bit, val);

	3864 }

	3865

	3866 } // anonymous namespace

	3867

	3868 template <typename T>

	3869 T Simulator::FPRecipSqrtEstimate(T op) {

	3870 static_assert(std::is_same<float, T>::value \|\| std::is_same<double, T>::value,

	3871 "T must be a float or double");

	3872

	3873 if (std::isnan(op)) {

	3874 return FPProcessNaN(op);

	3875 } else if (op == 0.0) {

	3876 if (copysign(1.0, op) < 0.0) {

	3877 return kFP64NegativeInfinity;

	3878 } else {

	3879 return kFP64PositiveInfinity;

	3880 }

	3881 } else if (copysign(1.0, op) < 0.0) {

	3882 FPProcessException();

	3883 return FPDefaultNaN<T>();

	3884 } else if (std::isinf(op)) {

	3885 return 0.0;

	3886 } else {

	3887 uint64_t fraction;

	3888 int32_t exp, result_exp;

	3889

	3890 if (sizeof(T) == sizeof(float)) {

	3891 exp = static_cast<int32_t>(float_exp(op));

	3892 fraction = float_mantissa(op);

	3893 fraction <<= 29;

	3894 } else {

	3895 exp = static_cast<int32_t>(double_exp(op));

	3896 fraction = double_mantissa(op);

	3897 }

	3898

	3899 if (exp == 0) {

	3900 while (Bits(fraction, 51, 51) == 0) {

	3901 fraction = Bits(fraction, 50, 0) << 1;

	3902 exp -= 1;

	3903 }

	3904 fraction = Bits(fraction, 50, 0) << 1;

	3905 }

	3906

	3907 double scaled;

	3908 if (Bits(exp, 0, 0) == 0) {

	3909 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);

	3910 } else {

	3911 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);

	3912 }

	3913

	3914 if (sizeof(T) == sizeof(float)) {

	3915 result_exp = (380 - exp) / 2;

	3916 } else {

	3917 result_exp = (3068 - exp) / 2;

	3918 }

	3919

	3920 uint64_t estimate = bit_cast<uint64_t>(recip_sqrt_estimate(scaled));

	3921

	3922 if (sizeof(T) == sizeof(float)) {

	3923 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));

	3924 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));

	3925 return float_pack(0, exp_bits, est_bits);

	3926 } else {

	3927 return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));

	3928 }

	3929 }

	3930 }

	3931

	3932 LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst,

	3933 const LogicVRegister& src) {

	3934 dst.ClearForWrite(vform);

	3935 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3936 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3937 float input = src.Float<float>(i);

	3938 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));

	3939 }

	3940 } else {

	3941 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3942 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3943 double input = src.Float<double>(i);

	3944 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));

	3945 }

	3946 }

	3947 return dst;

	3948 }

	3949

	3950 template <typename T>

	3951 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {

	3952 static_assert(std::is_same<float, T>::value \|\| std::is_same<double, T>::value,

	3953 "T must be a float or double");

	3954 uint32_t sign;

	3955

	3956 if (sizeof(T) == sizeof(float)) {

	3957 sign = float_sign(op);

	3958 } else {

	3959 sign = double_sign(op);

	3960 }

	3961

	3962 if (std::isnan(op)) {

	3963 return FPProcessNaN(op);

	3964 } else if (std::isinf(op)) {

	3965 return (sign == 1) ? -0.0 : 0.0;

	3966 } else if (op == 0.0) {

	3967 FPProcessException(); // FPExc_DivideByZero exception.

	3968 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;

	3969 } else if (((sizeof(T) == sizeof(float)) &&

	3970 (std::fabs(op) < std::pow(2.0, -128.0))) \|\|

	3971 ((sizeof(T) == sizeof(double)) &&

	3972 (std::fabs(op) < std::pow(2.0, -1024.0)))) {

	3973 bool overflow_to_inf = false;

	3974 switch (rounding) {

	3975 case FPTieEven:

	3976 overflow_to_inf = true;

	3977 break;

	3978 case FPPositiveInfinity:

	3979 overflow_to_inf = (sign == 0);

	3980 break;

	3981 case FPNegativeInfinity:

	3982 overflow_to_inf = (sign == 1);

	3983 break;

	3984 case FPZero:

	3985 overflow_to_inf = false;

	3986 break;

	3987 default:

	3988 break;

	3989 }

	3990 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.

	3991 if (overflow_to_inf) {

	3992 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;

	3993 } else {

	3994 // Return FPMaxNormal(sign).

	3995 if (sizeof(T) == sizeof(float)) {

	3996 return float_pack(sign, 0xfe, 0x07fffff);

	3997 } else {

	3998 return double_pack(sign, 0x7fe, 0x0fffffffffffffl);

	3999 }

	4000 }

	4001 } else {

	4002 uint64_t fraction;

	4003 int32_t exp, result_exp;

	4004 uint32_t sign;

	4005

	4006 if (sizeof(T) == sizeof(float)) {

	4007 sign = float_sign(op);

	4008 exp = static_cast<int32_t>(float_exp(op));

	4009 fraction = float_mantissa(op);

	4010 fraction <<= 29;

	4011 } else {

	4012 sign = double_sign(op);

	4013 exp = static_cast<int32_t>(double_exp(op));

	4014 fraction = double_mantissa(op);

	4015 }

	4016

	4017 if (exp == 0) {

	4018 if (Bits(fraction, 51, 51) == 0) {

	4019 exp -= 1;

	4020 fraction = Bits(fraction, 49, 0) << 2;

	4021 } else {

	4022 fraction = Bits(fraction, 50, 0) << 1;

	4023 }

	4024 }

	4025

	4026 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);

	4027

	4028 if (sizeof(T) == sizeof(float)) {

	4029 result_exp = 253 - exp;

	4030 } else {

	4031 result_exp = 2045 - exp;

	4032 }

	4033

	4034 double estimate = recip_estimate(scaled);

	4035

	4036 fraction = double_mantissa(estimate);

	4037 if (result_exp == 0) {

	4038 fraction = (UINT64_C(1) << 51) \| Bits(fraction, 51, 1);

	4039 } else if (result_exp == -1) {

	4040 fraction = (UINT64_C(1) << 50) \| Bits(fraction, 51, 2);

	4041 result_exp = 0;

	4042 }

	4043 if (sizeof(T) == sizeof(float)) {

	4044 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));

	4045 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));

	4046 return float_pack(sign, exp_bits, frac_bits);

	4047 } else {

	4048 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));

	4049 }

	4050 }

	4051 }

	4052

	4053 LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst,

	4054 const LogicVRegister& src, FPRounding round) {

	4055 dst.ClearForWrite(vform);

	4056 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	4057 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4058 float input = src.Float<float>(i);

	4059 dst.SetFloat(i, FPRecipEstimate<float>(input, round));

	4060 }

	4061 } else {

	4062 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	4063 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4064 double input = src.Float<double>(i);

	4065 dst.SetFloat(i, FPRecipEstimate<double>(input, round));

	4066 }

	4067 }

	4068 return dst;

	4069 }

	4070

	4071 LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst,

	4072 const LogicVRegister& src) {

	4073 dst.ClearForWrite(vform);

	4074 uint64_t operand;

	4075 uint32_t result;

	4076 double dp_operand, dp_result;

	4077 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4078 operand = src.Uint(vform, i);

	4079 if (operand <= 0x3FFFFFFF) {

	4080 result = 0xFFFFFFFF;

	4081 } else {

	4082 dp_operand = operand * std::pow(2.0, -32);

	4083 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);

	4084 result = static_cast<uint32_t>(dp_result);

	4085 }

	4086 dst.SetUint(vform, i, result);

	4087 }

	4088 return dst;

	4089 }

	4090

	4091 // Based on reference C function recip_estimate from ARM ARM.

	4092 double Simulator::recip_estimate(double a) {

	4093 int q, s;

	4094 double r;

	4095 q = static_cast<int>(a * 512.0);

	4096 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);

	4097 s = static_cast<int>(256.0 * r + 0.5);

	4098 return static_cast<double>(s) / 256.0;

	4099 }

	4100

	4101 LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst,

	4102 const LogicVRegister& src) {

	4103 dst.ClearForWrite(vform);

	4104 uint64_t operand;

	4105 uint32_t result;

	4106 double dp_operand, dp_result;

	4107 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4108 operand = src.Uint(vform, i);

	4109 if (operand <= 0x7FFFFFFF) {

	4110 result = 0xFFFFFFFF;

	4111 } else {

	4112 dp_operand = operand * std::pow(2.0, -32);

	4113 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);

	4114 result = static_cast<uint32_t>(dp_result);

	4115 }

	4116 dst.SetUint(vform, i, result);

	4117 }

	4118 return dst;

	4119 }

	4120

	4121 template <typename T>

	4122 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,

	4123 const LogicVRegister& src) {

	4124 dst.ClearForWrite(vform);

	4125 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4126 T op = src.Float<T>(i);

	4127 T result;

	4128 if (std::isnan(op)) {

	4129 result = FPProcessNaN(op);

	4130 } else {

	4131 int exp;

	4132 uint32_t sign;

	4133 if (sizeof(T) == sizeof(float)) {

	4134 sign = float_sign(op);

	4135 exp = static_cast<int>(float_exp(op));

	4136 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));

	4137 result = float_pack(sign, exp, 0);

	4138 } else {

	4139 sign = double_sign(op);

	4140 exp = static_cast<int>(double_exp(op));

	4141 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));

	4142 result = double_pack(sign, exp, 0);

	4143 }

	4144 }

	4145 dst.SetFloat(i, result);

	4146 }

	4147 return dst;

	4148 }

	4149

	4150 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,

	4151 const LogicVRegister& src) {

	4152 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	4153 frecpx<float>(vform, dst, src);

	4154 } else {

	4155 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	4156 frecpx<double>(vform, dst, src);

	4157 }

	4158 return dst;

	4159 }

	4160

	4161 LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst,

	4162 const LogicVRegister& src, int fbits,

	4163 FPRounding round) {

	4164 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4165 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	4166 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);

	4167 dst.SetFloat<float>(i, result);

	4168 } else {

	4169 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	4170 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);

	4171 dst.SetFloat<double>(i, result);

	4172 }

	4173 }

	4174 return dst;

	4175 }

	4176

	4177 LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst,

	4178 const LogicVRegister& src, int fbits,

	4179 FPRounding round) {

	4180 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4181 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	4182 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);

	4183 dst.SetFloat<float>(i, result);

	4184 } else {

	4185 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	4186 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);

	4187 dst.SetFloat<double>(i, result);

	4188 }

	4189 }

	4190 return dst;

	4191 }

	4192

	4193 #endif // USE_SIMULATOR

	4194

	4195 } // namespace internal

	4196 } // namespace v8

	4197

	4198 #endif // V8_TARGET_ARCH_ARM64

OLD	NEW

« no previous file with comments | « src/arm64/simulator-arm64.cc ('k') | src/arm64/utils-arm64.h » ('j') | no next file with comments »