src/arm64/simulator-logic-arm64.cc - Issue 2622643005: ARM64: Add NEON support

Side by Side Diff: src/arm64/simulator-logic-arm64.cc

Issue 2622643005: ARM64: Add NEON support (Closed)

Patch Set: Restore AreConsecutive change Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 // Copyright 2016 the V8 project authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #if V8_TARGET_ARCH_ARM64

	6

	7 #include <cmath>

	8 #include "src/arm64/simulator-arm64.h"

	9

	10 namespace v8 {

	11 namespace internal {

	12

	13 #if defined(USE_SIMULATOR)

	14

	15 namespace {

	16

	17 // See FPRound for a description of this function.

	18 inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa,

	19 FPRounding round_mode) {

	20 uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(

	21 sign, exponent, mantissa, round_mode);

	22 return bit_cast<double>(bits);

	23 }

	24

	25 // See FPRound for a description of this function.

	26 inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa,

	27 FPRounding round_mode) {

	28 uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(

	29 sign, exponent, mantissa, round_mode);

	30 return bit_cast<float>(bits);

	31 }

	32

	33 // See FPRound for a description of this function.

	34 inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent,

	35 uint64_t mantissa, FPRounding round_mode) {

	36 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(

	37 sign, exponent, mantissa, round_mode);

	38 }

	39

	40 } // namespace

	41

	42 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {

	43 if (src >= 0) {

	44 return UFixedToDouble(src, fbits, round);

	45 } else if (src == INT64_MIN) {

	46 return -UFixedToDouble(src, fbits, round);

	47 } else {

	48 return -UFixedToDouble(-src, fbits, round);

	49 }

	50 }

	51

	52 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {

	53 // An input of 0 is a special case because the result is effectively

	54 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.

	55 if (src == 0) {

	56 return 0.0;

	57 }

	58

	59 // Calculate the exponent. The highest significant bit will have the value

	60 // 2^exponent.

	61 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);

	62 const int64_t exponent = highest_significant_bit - fbits;

	63

	64 return FPRoundToDouble(0, exponent, src, round);

	65 }

	66

	67 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {

	68 if (src >= 0) {

	69 return UFixedToFloat(src, fbits, round);

	70 } else if (src == INT64_MIN) {

	71 return -UFixedToFloat(src, fbits, round);

	72 } else {

	73 return -UFixedToFloat(-src, fbits, round);

	74 }

	75 }

	76

	77 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {

	78 // An input of 0 is a special case because the result is effectively

	79 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.

	80 if (src == 0) {

	81 return 0.0f;

	82 }

	83

	84 // Calculate the exponent. The highest significant bit will have the value

	85 // 2^exponent.

	86 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);

	87 const int32_t exponent = highest_significant_bit - fbits;

	88

	89 return FPRoundToFloat(0, exponent, src, round);

	90 }

	91

	92 double Simulator::FPToDouble(float value) {

	93 switch (std::fpclassify(value)) {

	94 case FP_NAN: {

	95 if (IsSignallingNaN(value)) {

	96 FPProcessException();

	97 }

	98 if (DN()) return kFP64DefaultNaN;

	99

	100 // Convert NaNs as the processor would:

	101 // - The sign is propagated.

	102 // - The mantissa is transferred entirely, except that the top bit is

	103 // forced to '1', making the result a quiet NaN. The unused (low-order)

	104 // mantissa bits are set to 0.

	105 uint32_t raw = bit_cast<uint32_t>(value);

	106

	107 uint64_t sign = raw >> 31;

	108 uint64_t exponent = (1 << kDoubleExponentBits) - 1;

	109 uint64_t mantissa = unsigned_bitextract_64(21, 0, raw);

	110

	111 // Unused low-order bits remain zero.

	112 mantissa <<= (kDoubleMantissaBits - kFloatMantissaBits);

	113

	114 // Force a quiet NaN.

	115 mantissa \|= (UINT64_C(1) << (kDoubleMantissaBits - 1));

	116

	117 return double_pack(sign, exponent, mantissa);

	118 }

	119

	120 case FP_ZERO:

	121 case FP_NORMAL:

	122 case FP_SUBNORMAL:

	123 case FP_INFINITE: {

	124 // All other inputs are preserved in a standard cast, because every value

	125 // representable using an IEEE-754 float is also representable using an

	126 // IEEE-754 double.

	127 return static_cast<double>(value);

	128 }

	129 }

	130

	131 UNREACHABLE();

	132 return kFP64DefaultNaN;

	133 }

	134

	135 float Simulator::FPToFloat(float16 value) {

	136 uint32_t sign = value >> 15;

	137 uint32_t exponent =

	138 unsigned_bitextract_32(kFloat16MantissaBits + kFloat16ExponentBits - 1,

	139 kFloat16MantissaBits, value);

	140 uint32_t mantissa =

	141 unsigned_bitextract_32(kFloat16MantissaBits - 1, 0, value);

	142

	143 switch (float16classify(value)) {

	144 case FP_ZERO:

	145 return (sign == 0) ? 0.0f : -0.0f;

	146

	147 case FP_INFINITE:

	148 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;

	149

	150 case FP_SUBNORMAL: {

	151 // Calculate shift required to put mantissa into the most-significant bits

	152 // of the destination mantissa.

	153 int shift = CountLeadingZeros(mantissa << (32 - 10), 32);

	154

	155 // Shift mantissa and discard implicit '1'.

	156 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;

	157 mantissa &= (1 << kFloatMantissaBits) - 1;

	158

	159 // Adjust the exponent for the shift applied, and rebias.

	160 exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias);

	161 break;

	162 }

	163

	164 case FP_NAN: {

	165 if (IsSignallingNaN(value)) {

	166 FPProcessException();

	167 }

	168 if (DN()) return kFP32DefaultNaN;

	169

	170 // Convert NaNs as the processor would:

	171 // - The sign is propagated.

	172 // - The mantissa is transferred entirely, except that the top bit is

	173 // forced to '1', making the result a quiet NaN. The unused (low-order)

	174 // mantissa bits are set to 0.

	175 exponent = (1 << kFloatExponentBits) - 1;

	176

	177 // Increase bits in mantissa, making low-order bits 0.

	178 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);

	179 mantissa \|= 1 << (kFloatMantissaBits - 1); // Force a quiet NaN.

	180 break;

	181 }

	182

	183 case FP_NORMAL: {

	184 // Increase bits in mantissa, making low-order bits 0.

	185 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);

	186

	187 // Change exponent bias.

	188 exponent += (kFloatExponentBias - kFloat16ExponentBias);

	189 break;

	190 }

	191

	192 default:

	193 UNREACHABLE();

	194 return kFP32DefaultNaN;

	195 }

	196 return float_pack(sign, exponent, mantissa);

	197 }

	198

	199 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {

	200 // Only the FPTieEven rounding mode is implemented.

	201 DCHECK_EQ(round_mode, FPTieEven);

	202 USE(round_mode);

	203

	204 int64_t sign = float_sign(value);

	205 int64_t exponent =

	206 static_cast<int64_t>(float_exp(value)) - kFloatExponentBias;

	207 uint32_t mantissa = float_mantissa(value);

	208

	209 switch (std::fpclassify(value)) {

	210 case FP_NAN: {

	211 if (IsSignallingNaN(value)) {

	212 FPProcessException();

	213 }

	214 if (DN()) return kFP16DefaultNaN;

	215

	216 // Convert NaNs as the processor would:

	217 // - The sign is propagated.

	218 // - The mantissa is transferred as much as possible, except that the top

	219 // bit is forced to '1', making the result a quiet NaN.

	220 float16 result =

	221 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;

	222 result \|= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);

	223 result \|= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;

	224 return result;

	225 }

	226

	227 case FP_ZERO:

	228 return (sign == 0) ? 0 : 0x8000;

	229

	230 case FP_INFINITE:

	231 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;

	232

	233 case FP_NORMAL:

	234 case FP_SUBNORMAL: {

	235 // Convert float-to-half as the processor would, assuming that FPCR.FZ

	236 // (flush-to-zero) is not set.

	237

	238 // Add the implicit '1' bit to the mantissa.

	239 mantissa += (1 << kFloatMantissaBits);

	240 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);

	241 }

	242 }

	243

	244 UNREACHABLE();

	245 return kFP16DefaultNaN;

	246 }

	247

	248 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {

	249 // Only the FPTieEven rounding mode is implemented.

	250 DCHECK_EQ(round_mode, FPTieEven);

	251 USE(round_mode);

	252

	253 int64_t sign = double_sign(value);

	254 int64_t exponent =

	255 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;

	256 uint64_t mantissa = double_mantissa(value);

	257

	258 switch (std::fpclassify(value)) {

	259 case FP_NAN: {

	260 if (IsSignallingNaN(value)) {

	261 FPProcessException();

	262 }

	263 if (DN()) return kFP16DefaultNaN;

	264

	265 // Convert NaNs as the processor would:

	266 // - The sign is propagated.

	267 // - The mantissa is transferred as much as possible, except that the top

	268 // bit is forced to '1', making the result a quiet NaN.

	269 float16 result =

	270 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;

	271 result \|= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);

	272 result \|= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;

	273 return result;

	274 }

	275

	276 case FP_ZERO:

	277 return (sign == 0) ? 0 : 0x8000;

	278

	279 case FP_INFINITE:

	280 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;

	281

	282 case FP_NORMAL:

	283 case FP_SUBNORMAL: {

	284 // Convert double-to-half as the processor would, assuming that FPCR.FZ

	285 // (flush-to-zero) is not set.

	286

	287 // Add the implicit '1' bit to the mantissa.

	288 mantissa += (UINT64_C(1) << kDoubleMantissaBits);

	289 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);

	290 }

	291 }

	292

	293 UNREACHABLE();

	294 return kFP16DefaultNaN;

	295 }

	296

	297 float Simulator::FPToFloat(double value, FPRounding round_mode) {

	298 // Only the FPTieEven rounding mode is implemented.

	299 DCHECK((round_mode == FPTieEven) \|\| (round_mode == FPRoundOdd));

	300 USE(round_mode);

	301

	302 switch (std::fpclassify(value)) {

	303 case FP_NAN: {

	304 if (IsSignallingNaN(value)) {

	305 FPProcessException();

	306 }

	307 if (DN()) return kFP32DefaultNaN;

	308

	309 // Convert NaNs as the processor would:

	310 // - The sign is propagated.

	311 // - The mantissa is transferred as much as possible, except that the

	312 // top bit is forced to '1', making the result a quiet NaN.

	313

	314 uint64_t raw = bit_cast<uint64_t>(value);

	315

	316 uint32_t sign = raw >> 63;

	317 uint32_t exponent = (1 << 8) - 1;

	318 uint32_t mantissa = static_cast<uint32_t>(unsigned_bitextract_64(

	319 50, kDoubleMantissaBits - kFloatMantissaBits, raw));

	320 mantissa \|= (1 << (kFloatMantissaBits - 1)); // Force a quiet NaN.

	321

	322 return float_pack(sign, exponent, mantissa);

	323 }

	324

	325 case FP_ZERO:

	326 case FP_INFINITE: {

	327 // In a C++ cast, any value representable in the target type will be

	328 // unchanged. This is always the case for +/-0.0 and infinities.

	329 return static_cast<float>(value);

	330 }

	331

	332 case FP_NORMAL:

	333 case FP_SUBNORMAL: {

	334 // Convert double-to-float as the processor would, assuming that FPCR.FZ

	335 // (flush-to-zero) is not set.

	336 uint32_t sign = double_sign(value);

	337 int64_t exponent =

	338 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;

	339 uint64_t mantissa = double_mantissa(value);

	340 if (std::fpclassify(value) == FP_NORMAL) {

	341 // For normal FP values, add the hidden bit.

	342 mantissa \|= (UINT64_C(1) << kDoubleMantissaBits);

	343 }

	344 return FPRoundToFloat(sign, exponent, mantissa, round_mode);

	345 }

	346 }

	347

	348 UNREACHABLE();

	349 return kFP32DefaultNaN;

	350 }

	351

	352 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {

	353 dst.ClearForWrite(vform);

	354 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	355 dst.ReadUintFromMem(vform, i, addr);

	356 addr += LaneSizeInBytesFromFormat(vform);

	357 }

	358 }

	359

	360 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index,

	361 uint64_t addr) {

	362 dst.ReadUintFromMem(vform, index, addr);

	363 }

	364

	365 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {

	366 dst.ClearForWrite(vform);

	367 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	368 dst.ReadUintFromMem(vform, i, addr);

	369 }

	370 }

	371

	372 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,

	373 LogicVRegister dst2, uint64_t addr1) {

	374 dst1.ClearForWrite(vform);

	375 dst2.ClearForWrite(vform);

	376 int esize = LaneSizeInBytesFromFormat(vform);

	377 uint64_t addr2 = addr1 + esize;

	378 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	379 dst1.ReadUintFromMem(vform, i, addr1);

	380 dst2.ReadUintFromMem(vform, i, addr2);

	381 addr1 += 2 * esize;

	382 addr2 += 2 * esize;

	383 }

	384 }

	385

	386 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,

	387 LogicVRegister dst2, int index, uint64_t addr1) {

	388 dst1.ClearForWrite(vform);

	389 dst2.ClearForWrite(vform);

	390 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);

	391 dst1.ReadUintFromMem(vform, index, addr1);

	392 dst2.ReadUintFromMem(vform, index, addr2);

	393 }

	394

	395 void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1,

	396 LogicVRegister dst2, uint64_t addr) {

	397 dst1.ClearForWrite(vform);

	398 dst2.ClearForWrite(vform);

	399 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);

	400 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	401 dst1.ReadUintFromMem(vform, i, addr);

	402 dst2.ReadUintFromMem(vform, i, addr2);

	403 }

	404 }

	405

	406 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,

	407 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) {

	408 dst1.ClearForWrite(vform);

	409 dst2.ClearForWrite(vform);

	410 dst3.ClearForWrite(vform);

	411 int esize = LaneSizeInBytesFromFormat(vform);

	412 uint64_t addr2 = addr1 + esize;

	413 uint64_t addr3 = addr2 + esize;

	414 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	415 dst1.ReadUintFromMem(vform, i, addr1);

	416 dst2.ReadUintFromMem(vform, i, addr2);

	417 dst3.ReadUintFromMem(vform, i, addr3);

	418 addr1 += 3 * esize;

	419 addr2 += 3 * esize;

	420 addr3 += 3 * esize;

	421 }

	422 }

	423

	424 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,

	425 LogicVRegister dst2, LogicVRegister dst3, int index,

	426 uint64_t addr1) {

	427 dst1.ClearForWrite(vform);

	428 dst2.ClearForWrite(vform);

	429 dst3.ClearForWrite(vform);

	430 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);

	431 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);

	432 dst1.ReadUintFromMem(vform, index, addr1);

	433 dst2.ReadUintFromMem(vform, index, addr2);

	434 dst3.ReadUintFromMem(vform, index, addr3);

	435 }

	436

	437 void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1,

	438 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) {

	439 dst1.ClearForWrite(vform);

	440 dst2.ClearForWrite(vform);

	441 dst3.ClearForWrite(vform);

	442 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);

	443 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);

	444 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	445 dst1.ReadUintFromMem(vform, i, addr);

	446 dst2.ReadUintFromMem(vform, i, addr2);

	447 dst3.ReadUintFromMem(vform, i, addr3);

	448 }

	449 }

	450

	451 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,

	452 LogicVRegister dst2, LogicVRegister dst3,

	453 LogicVRegister dst4, uint64_t addr1) {

	454 dst1.ClearForWrite(vform);

	455 dst2.ClearForWrite(vform);

	456 dst3.ClearForWrite(vform);

	457 dst4.ClearForWrite(vform);

	458 int esize = LaneSizeInBytesFromFormat(vform);

	459 uint64_t addr2 = addr1 + esize;

	460 uint64_t addr3 = addr2 + esize;

	461 uint64_t addr4 = addr3 + esize;

	462 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	463 dst1.ReadUintFromMem(vform, i, addr1);

	464 dst2.ReadUintFromMem(vform, i, addr2);

	465 dst3.ReadUintFromMem(vform, i, addr3);

	466 dst4.ReadUintFromMem(vform, i, addr4);

	467 addr1 += 4 * esize;

	468 addr2 += 4 * esize;

	469 addr3 += 4 * esize;

	470 addr4 += 4 * esize;

	471 }

	472 }

	473

	474 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,

	475 LogicVRegister dst2, LogicVRegister dst3,

	476 LogicVRegister dst4, int index, uint64_t addr1) {

	477 dst1.ClearForWrite(vform);

	478 dst2.ClearForWrite(vform);

	479 dst3.ClearForWrite(vform);

	480 dst4.ClearForWrite(vform);

	481 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);

	482 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);

	483 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);

	484 dst1.ReadUintFromMem(vform, index, addr1);

	485 dst2.ReadUintFromMem(vform, index, addr2);

	486 dst3.ReadUintFromMem(vform, index, addr3);

	487 dst4.ReadUintFromMem(vform, index, addr4);

	488 }

	489

	490 void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1,

	491 LogicVRegister dst2, LogicVRegister dst3,

	492 LogicVRegister dst4, uint64_t addr) {

	493 dst1.ClearForWrite(vform);

	494 dst2.ClearForWrite(vform);

	495 dst3.ClearForWrite(vform);

	496 dst4.ClearForWrite(vform);

	497 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);

	498 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);

	499 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);

	500 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	501 dst1.ReadUintFromMem(vform, i, addr);

	502 dst2.ReadUintFromMem(vform, i, addr2);

	503 dst3.ReadUintFromMem(vform, i, addr3);

	504 dst4.ReadUintFromMem(vform, i, addr4);

	505 }

	506 }

	507

	508 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {

	509 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	510 src.WriteUintToMem(vform, i, addr);

	511 addr += LaneSizeInBytesFromFormat(vform);

	512 }

	513 }

	514

	515 void Simulator::st1(VectorFormat vform, LogicVRegister src, int index,

	516 uint64_t addr) {

	517 src.WriteUintToMem(vform, index, addr);

	518 }

	519

	520 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,

	521 uint64_t addr) {

	522 int esize = LaneSizeInBytesFromFormat(vform);

	523 uint64_t addr2 = addr + esize;

	524 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	525 dst.WriteUintToMem(vform, i, addr);

	526 dst2.WriteUintToMem(vform, i, addr2);

	527 addr += 2 * esize;

	528 addr2 += 2 * esize;

	529 }

	530 }

	531

	532 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,

	533 int index, uint64_t addr) {

	534 int esize = LaneSizeInBytesFromFormat(vform);

	535 dst.WriteUintToMem(vform, index, addr);

	536 dst2.WriteUintToMem(vform, index, addr + 1 * esize);

	537 }

	538

	539 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,

	540 LogicVRegister dst3, uint64_t addr) {

	541 int esize = LaneSizeInBytesFromFormat(vform);

	542 uint64_t addr2 = addr + esize;

	543 uint64_t addr3 = addr2 + esize;

	544 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	545 dst.WriteUintToMem(vform, i, addr);

	546 dst2.WriteUintToMem(vform, i, addr2);

	547 dst3.WriteUintToMem(vform, i, addr3);

	548 addr += 3 * esize;

	549 addr2 += 3 * esize;

	550 addr3 += 3 * esize;

	551 }

	552 }

	553

	554 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,

	555 LogicVRegister dst3, int index, uint64_t addr) {

	556 int esize = LaneSizeInBytesFromFormat(vform);

	557 dst.WriteUintToMem(vform, index, addr);

	558 dst2.WriteUintToMem(vform, index, addr + 1 * esize);

	559 dst3.WriteUintToMem(vform, index, addr + 2 * esize);

	560 }

	561

	562 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,

	563 LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) {

	564 int esize = LaneSizeInBytesFromFormat(vform);

	565 uint64_t addr2 = addr + esize;

	566 uint64_t addr3 = addr2 + esize;

	567 uint64_t addr4 = addr3 + esize;

	568 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	569 dst.WriteUintToMem(vform, i, addr);

	570 dst2.WriteUintToMem(vform, i, addr2);

	571 dst3.WriteUintToMem(vform, i, addr3);

	572 dst4.WriteUintToMem(vform, i, addr4);

	573 addr += 4 * esize;

	574 addr2 += 4 * esize;

	575 addr3 += 4 * esize;

	576 addr4 += 4 * esize;

	577 }

	578 }

	579

	580 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,

	581 LogicVRegister dst3, LogicVRegister dst4, int index,

	582 uint64_t addr) {

	583 int esize = LaneSizeInBytesFromFormat(vform);

	584 dst.WriteUintToMem(vform, index, addr);

	585 dst2.WriteUintToMem(vform, index, addr + 1 * esize);

	586 dst3.WriteUintToMem(vform, index, addr + 2 * esize);

	587 dst4.WriteUintToMem(vform, index, addr + 3 * esize);

	588 }

	589

	590 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,

	591 const LogicVRegister& src1,

	592 const LogicVRegister& src2, Condition cond) {

	593 dst.ClearForWrite(vform);

	594 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	595 int64_t sa = src1.Int(vform, i);

	596 int64_t sb = src2.Int(vform, i);

	597 uint64_t ua = src1.Uint(vform, i);

	598 uint64_t ub = src2.Uint(vform, i);

	599 bool result = false;

	600 switch (cond) {

	601 case eq:

	602 result = (ua == ub);

	603 break;

	604 case ge:

	605 result = (sa >= sb);

	606 break;

	607 case gt:

	608 result = (sa > sb);

	609 break;

	610 case hi:

	611 result = (ua > ub);

	612 break;

	613 case hs:

	614 result = (ua >= ub);

	615 break;

	616 case lt:

	617 result = (sa < sb);

	618 break;

	619 case le:

	620 result = (sa <= sb);

	621 break;

	622 default:

	623 UNREACHABLE();

	624 break;

	625 }

	626 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);

	627 }

	628 return dst;

	629 }

	630

	631 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,

	632 const LogicVRegister& src1, int imm,

	633 Condition cond) {

	634 SimVRegister temp;

	635 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);

	636 return cmp(vform, dst, src1, imm_reg, cond);

	637 }

	638

	639 LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst,

	640 const LogicVRegister& src1,

	641 const LogicVRegister& src2) {

	642 dst.ClearForWrite(vform);

	643 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	644 uint64_t ua = src1.Uint(vform, i);

	645 uint64_t ub = src2.Uint(vform, i);

	646 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);

	647 }

	648 return dst;

	649 }

	650

	651 LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst,

	652 const LogicVRegister& src1,

	653 const LogicVRegister& src2) {

	654 int lane_size = LaneSizeInBitsFromFormat(vform);

	655 dst.ClearForWrite(vform);

	656 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	657 // Test for unsigned saturation.

	658 uint64_t ua = src1.UintLeftJustified(vform, i);

	659 uint64_t ub = src2.UintLeftJustified(vform, i);

	660 uint64_t ur = ua + ub;

	661 if (ur < ua) {

	662 dst.SetUnsignedSat(i, true);

	663 }

	664

	665 // Test for signed saturation.

	666 bool pos_a = (ua >> 63) == 0;

	667 bool pos_b = (ub >> 63) == 0;

	668 bool pos_r = (ur >> 63) == 0;

	669 // If the signs of the operands are the same, but different from the result,

	670 // there was an overflow.

	671 if ((pos_a == pos_b) && (pos_a != pos_r)) {

	672 dst.SetSignedSat(i, pos_a);

	673 }

	674

	675 dst.SetInt(vform, i, ur >> (64 - lane_size));

	676 }

	677 return dst;

	678 }

	679

	680 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,

	681 const LogicVRegister& src1,

	682 const LogicVRegister& src2) {

	683 SimVRegister temp1, temp2;

	684 uzp1(vform, temp1, src1, src2);

	685 uzp2(vform, temp2, src1, src2);

	686 add(vform, dst, temp1, temp2);

	687 return dst;

	688 }

	689

	690 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,

	691 const LogicVRegister& src1,

	692 const LogicVRegister& src2) {

	693 SimVRegister temp;

	694 mul(vform, temp, src1, src2);

	695 add(vform, dst, dst, temp);

	696 return dst;

	697 }

	698

	699 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,

	700 const LogicVRegister& src1,

	701 const LogicVRegister& src2) {

	702 SimVRegister temp;

	703 mul(vform, temp, src1, src2);

	704 sub(vform, dst, dst, temp);

	705 return dst;

	706 }

	707

	708 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,

	709 const LogicVRegister& src1,

	710 const LogicVRegister& src2) {

	711 dst.ClearForWrite(vform);

	712 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	713 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));

	714 }

	715 return dst;

	716 }

	717

	718 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,

	719 const LogicVRegister& src1,

	720 const LogicVRegister& src2, int index) {

	721 SimVRegister temp;

	722 VectorFormat indexform = VectorFormatFillQ(vform);

	723 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));

	724 }

	725

	726 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,

	727 const LogicVRegister& src1,

	728 const LogicVRegister& src2, int index) {

	729 SimVRegister temp;

	730 VectorFormat indexform = VectorFormatFillQ(vform);

	731 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));

	732 }

	733

	734 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,

	735 const LogicVRegister& src1,

	736 const LogicVRegister& src2, int index) {

	737 SimVRegister temp;

	738 VectorFormat indexform = VectorFormatFillQ(vform);

	739 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));

	740 }

	741

	742 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,

	743 const LogicVRegister& src1,

	744 const LogicVRegister& src2, int index) {

	745 SimVRegister temp;

	746 VectorFormat indexform =

	747 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	748 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));

	749 }

	750

	751 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,

	752 const LogicVRegister& src1,

	753 const LogicVRegister& src2, int index) {

	754 SimVRegister temp;

	755 VectorFormat indexform =

	756 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	757 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	758 }

	759

	760 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,

	761 const LogicVRegister& src1,

	762 const LogicVRegister& src2, int index) {

	763 SimVRegister temp;

	764 VectorFormat indexform =

	765 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	766 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));

	767 }

	768

	769 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,

	770 const LogicVRegister& src1,

	771 const LogicVRegister& src2, int index) {

	772 SimVRegister temp;

	773 VectorFormat indexform =

	774 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	775 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	776 }

	777

	778 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,

	779 const LogicVRegister& src1,

	780 const LogicVRegister& src2, int index) {

	781 SimVRegister temp;

	782 VectorFormat indexform =

	783 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	784 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));

	785 }

	786

	787 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,

	788 const LogicVRegister& src1,

	789 const LogicVRegister& src2, int index) {

	790 SimVRegister temp;

	791 VectorFormat indexform =

	792 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	793 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	794 }

	795

	796 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,

	797 const LogicVRegister& src1,

	798 const LogicVRegister& src2, int index) {

	799 SimVRegister temp;

	800 VectorFormat indexform =

	801 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	802 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));

	803 }

	804

	805 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,

	806 const LogicVRegister& src1,

	807 const LogicVRegister& src2, int index) {

	808 SimVRegister temp;

	809 VectorFormat indexform =

	810 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	811 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	812 }

	813

	814 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,

	815 const LogicVRegister& src1,

	816 const LogicVRegister& src2, int index) {

	817 SimVRegister temp;

	818 VectorFormat indexform =

	819 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	820 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));

	821 }

	822

	823 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,

	824 const LogicVRegister& src1,

	825 const LogicVRegister& src2, int index) {

	826 SimVRegister temp;

	827 VectorFormat indexform =

	828 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	829 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	830 }

	831

	832 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,

	833 const LogicVRegister& src1,

	834 const LogicVRegister& src2, int index) {

	835 SimVRegister temp;

	836 VectorFormat indexform =

	837 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	838 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));

	839 }

	840

	841 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,

	842 const LogicVRegister& src1,

	843 const LogicVRegister& src2, int index) {

	844 SimVRegister temp;

	845 VectorFormat indexform =

	846 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	847 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	848 }

	849

	850 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,

	851 const LogicVRegister& src1,

	852 const LogicVRegister& src2, int index) {

	853 SimVRegister temp;

	854 VectorFormat indexform =

	855 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	856 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));

	857 }

	858

	859 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,

	860 const LogicVRegister& src1,

	861 const LogicVRegister& src2, int index) {

	862 SimVRegister temp;

	863 VectorFormat indexform =

	864 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	865 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	866 }

	867

	868 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,

	869 const LogicVRegister& src1,

	870 const LogicVRegister& src2, int index) {

	871 SimVRegister temp;

	872 VectorFormat indexform =

	873 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	874 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));

	875 }

	876

	877 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,

	878 const LogicVRegister& src1,

	879 const LogicVRegister& src2, int index) {

	880 SimVRegister temp;

	881 VectorFormat indexform =

	882 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	883 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	884 }

	885

	886 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,

	887 const LogicVRegister& src1,

	888 const LogicVRegister& src2, int index) {

	889 SimVRegister temp;

	890 VectorFormat indexform =

	891 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	892 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));

	893 }

	894

	895 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,

	896 const LogicVRegister& src1,

	897 const LogicVRegister& src2, int index) {

	898 SimVRegister temp;

	899 VectorFormat indexform =

	900 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	901 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	902 }

	903

	904 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,

	905 const LogicVRegister& src1,

	906 const LogicVRegister& src2, int index) {

	907 SimVRegister temp;

	908 VectorFormat indexform = VectorFormatFillQ(vform);

	909 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));

	910 }

	911

	912 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,

	913 const LogicVRegister& src1,

	914 const LogicVRegister& src2, int index) {

	915 SimVRegister temp;

	916 VectorFormat indexform = VectorFormatFillQ(vform);

	917 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));

	918 }

	919

	920 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {

	921 uint16_t result = 0;

	922 uint16_t extended_op2 = op2;

	923 for (int i = 0; i < 8; ++i) {

	924 if ((op1 >> i) & 1) {

	925 result = result ^ (extended_op2 << i);

	926 }

	927 }

	928 return result;

	929 }

	930

	931 LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst,

	932 const LogicVRegister& src1,

	933 const LogicVRegister& src2) {

	934 dst.ClearForWrite(vform);

	935 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	936 dst.SetUint(vform, i,

	937 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));

	938 }

	939 return dst;

	940 }

	941

	942 LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst,

	943 const LogicVRegister& src1,

	944 const LogicVRegister& src2) {

	945 VectorFormat vform_src = VectorFormatHalfWidth(vform);

	946 dst.ClearForWrite(vform);

	947 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	948 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, i),

	949 src2.Uint(vform_src, i)));

	950 }

	951 return dst;

	952 }

	953

	954 LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst,

	955 const LogicVRegister& src1,

	956 const LogicVRegister& src2) {

	957 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);

	958 dst.ClearForWrite(vform);

	959 int lane_count = LaneCountFromFormat(vform);

	960 for (int i = 0; i < lane_count; i++) {

	961 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, lane_count + i),

	962 src2.Uint(vform_src, lane_count + i)));

	963 }

	964 return dst;

	965 }

	966

	967 LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst,

	968 const LogicVRegister& src1,

	969 const LogicVRegister& src2) {

	970 int lane_size = LaneSizeInBitsFromFormat(vform);

	971 dst.ClearForWrite(vform);

	972 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	973 // Test for unsigned saturation.

	974 uint64_t ua = src1.UintLeftJustified(vform, i);

	975 uint64_t ub = src2.UintLeftJustified(vform, i);

	976 uint64_t ur = ua - ub;

	977 if (ub > ua) {

	978 dst.SetUnsignedSat(i, false);

	979 }

	980

	981 // Test for signed saturation.

	982 bool pos_a = (ua >> 63) == 0;

	983 bool pos_b = (ub >> 63) == 0;

	984 bool pos_r = (ur >> 63) == 0;

	985 // If the signs of the operands are different, and the sign of the first

	986 // operand doesn't match the result, there was an overflow.

	987 if ((pos_a != pos_b) && (pos_a != pos_r)) {

	988 dst.SetSignedSat(i, pos_a);

	989 }

	990

	991 dst.SetInt(vform, i, ur >> (64 - lane_size));

	992 }

	993 return dst;

	994 }

	995

	996 LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst,

	997 const LogicVRegister& src1,

	998 const LogicVRegister& src2) {

	999 dst.ClearForWrite(vform);

	1000 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1001 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));

	1002 }

	1003 return dst;

	1004 }

	1005

	1006 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,

	1007 const LogicVRegister& src1,

	1008 const LogicVRegister& src2) {

	1009 dst.ClearForWrite(vform);

	1010 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1011 dst.SetUint(vform, i, src1.Uint(vform, i) \| src2.Uint(vform, i));

	1012 }

	1013 return dst;

	1014 }

	1015

	1016 LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst,

	1017 const LogicVRegister& src1,

	1018 const LogicVRegister& src2) {

	1019 dst.ClearForWrite(vform);

	1020 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1021 dst.SetUint(vform, i, src1.Uint(vform, i) \| ~src2.Uint(vform, i));

	1022 }

	1023 return dst;

	1024 }

	1025

	1026 LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst,

	1027 const LogicVRegister& src1,

	1028 const LogicVRegister& src2) {

	1029 dst.ClearForWrite(vform);

	1030 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1031 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));

	1032 }

	1033 return dst;

	1034 }

	1035

	1036 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,

	1037 const LogicVRegister& src1,

	1038 const LogicVRegister& src2) {

	1039 dst.ClearForWrite(vform);

	1040 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1041 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));

	1042 }

	1043 return dst;

	1044 }

	1045

	1046 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,

	1047 const LogicVRegister& src, uint64_t imm) {

	1048 uint64_t result[16];

	1049 int laneCount = LaneCountFromFormat(vform);

	1050 for (int i = 0; i < laneCount; ++i) {

	1051 result[i] = src.Uint(vform, i) & ~imm;

	1052 }

	1053 dst.SetUintArray(vform, result);

	1054 return dst;

	1055 }

	1056

	1057 LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst,

	1058 const LogicVRegister& src1,

	1059 const LogicVRegister& src2) {

	1060 dst.ClearForWrite(vform);

	1061 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1062 uint64_t operand1 = dst.Uint(vform, i);

	1063 uint64_t operand2 = ~src2.Uint(vform, i);

	1064 uint64_t operand3 = src1.Uint(vform, i);

	1065 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);

	1066 dst.SetUint(vform, i, result);

	1067 }

	1068 return dst;

	1069 }

	1070

	1071 LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst,

	1072 const LogicVRegister& src1,

	1073 const LogicVRegister& src2) {

	1074 dst.ClearForWrite(vform);

	1075 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1076 uint64_t operand1 = dst.Uint(vform, i);

	1077 uint64_t operand2 = src2.Uint(vform, i);

	1078 uint64_t operand3 = src1.Uint(vform, i);

	1079 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);

	1080 dst.SetUint(vform, i, result);

	1081 }

	1082 return dst;

	1083 }

	1084

	1085 LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst,

	1086 const LogicVRegister& src1,

	1087 const LogicVRegister& src2) {

	1088 dst.ClearForWrite(vform);

	1089 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1090 uint64_t operand1 = src2.Uint(vform, i);

	1091 uint64_t operand2 = dst.Uint(vform, i);

	1092 uint64_t operand3 = src1.Uint(vform, i);

	1093 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);

	1094 dst.SetUint(vform, i, result);

	1095 }

	1096 return dst;

	1097 }

	1098

	1099 LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst,

	1100 const LogicVRegister& src1,

	1101 const LogicVRegister& src2, bool max) {

	1102 dst.ClearForWrite(vform);

	1103 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1104 int64_t src1_val = src1.Int(vform, i);

	1105 int64_t src2_val = src2.Int(vform, i);

	1106 int64_t dst_val;

	1107 if (max) {

	1108 dst_val = (src1_val > src2_val) ? src1_val : src2_val;

	1109 } else {

	1110 dst_val = (src1_val < src2_val) ? src1_val : src2_val;

	1111 }

	1112 dst.SetInt(vform, i, dst_val);

	1113 }

	1114 return dst;

	1115 }

	1116

	1117 LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst,

	1118 const LogicVRegister& src1,

	1119 const LogicVRegister& src2) {

	1120 return SMinMax(vform, dst, src1, src2, true);

	1121 }

	1122

	1123 LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst,

	1124 const LogicVRegister& src1,

	1125 const LogicVRegister& src2) {

	1126 return SMinMax(vform, dst, src1, src2, false);

	1127 }

	1128

	1129 LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst,

	1130 const LogicVRegister& src1,

	1131 const LogicVRegister& src2, bool max) {

	1132 int lanes = LaneCountFromFormat(vform);

	1133 int64_t result[kMaxLanesPerVector];

	1134 const LogicVRegister* src = &src1;

	1135 for (int j = 0; j < 2; j++) {

	1136 for (int i = 0; i < lanes; i += 2) {

	1137 int64_t first_val = src->Int(vform, i);

	1138 int64_t second_val = src->Int(vform, i + 1);

	1139 int64_t dst_val;

	1140 if (max) {

	1141 dst_val = (first_val > second_val) ? first_val : second_val;

	1142 } else {

	1143 dst_val = (first_val < second_val) ? first_val : second_val;

	1144 }

	1145 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);

	1146 result[(i >> 1) + (j * lanes / 2)] = dst_val;

	1147 }

	1148 src = &src2;

	1149 }

	1150 dst.SetIntArray(vform, result);

	1151 return dst;

	1152 }

	1153

	1154 LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst,

	1155 const LogicVRegister& src1,

	1156 const LogicVRegister& src2) {

	1157 return SMinMaxP(vform, dst, src1, src2, true);

	1158 }

	1159

	1160 LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst,

	1161 const LogicVRegister& src1,

	1162 const LogicVRegister& src2) {

	1163 return SMinMaxP(vform, dst, src1, src2, false);

	1164 }

	1165

	1166 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,

	1167 const LogicVRegister& src) {

	1168 DCHECK_EQ(vform, kFormatD);

	1169

	1170 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);

	1171 dst.ClearForWrite(vform);

	1172 dst.SetUint(vform, 0, dst_val);

	1173 return dst;

	1174 }

	1175

	1176 LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst,

	1177 const LogicVRegister& src) {

	1178 VectorFormat vform_dst =

	1179 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));

	1180

	1181 int64_t dst_val = 0;

	1182 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1183 dst_val += src.Int(vform, i);

	1184 }

	1185

	1186 dst.ClearForWrite(vform_dst);

	1187 dst.SetInt(vform_dst, 0, dst_val);

	1188 return dst;

	1189 }

	1190

	1191 LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst,

	1192 const LogicVRegister& src) {

	1193 VectorFormat vform_dst =

	1194 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);

	1195

	1196 int64_t dst_val = 0;

	1197 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1198 dst_val += src.Int(vform, i);

	1199 }

	1200

	1201 dst.ClearForWrite(vform_dst);

	1202 dst.SetInt(vform_dst, 0, dst_val);

	1203 return dst;

	1204 }

	1205

	1206 LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst,

	1207 const LogicVRegister& src) {

	1208 VectorFormat vform_dst =

	1209 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);

	1210

	1211 uint64_t dst_val = 0;

	1212 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1213 dst_val += src.Uint(vform, i);

	1214 }

	1215

	1216 dst.ClearForWrite(vform_dst);

	1217 dst.SetUint(vform_dst, 0, dst_val);

	1218 return dst;

	1219 }

	1220

	1221 LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst,

	1222 const LogicVRegister& src, bool max) {

	1223 int64_t dst_val = max ? INT64_MIN : INT64_MAX;

	1224 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1225 int64_t src_val = src.Int(vform, i);

	1226 if (max) {

	1227 dst_val = (src_val > dst_val) ? src_val : dst_val;

	1228 } else {

	1229 dst_val = (src_val < dst_val) ? src_val : dst_val;

	1230 }

	1231 }

	1232 dst.ClearForWrite(ScalarFormatFromFormat(vform));

	1233 dst.SetInt(vform, 0, dst_val);

	1234 return dst;

	1235 }

	1236

	1237 LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst,

	1238 const LogicVRegister& src) {

	1239 SMinMaxV(vform, dst, src, true);

	1240 return dst;

	1241 }

	1242

	1243 LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst,

	1244 const LogicVRegister& src) {

	1245 SMinMaxV(vform, dst, src, false);

	1246 return dst;

	1247 }

	1248

	1249 LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst,

	1250 const LogicVRegister& src1,

	1251 const LogicVRegister& src2, bool max) {

	1252 dst.ClearForWrite(vform);

	1253 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1254 uint64_t src1_val = src1.Uint(vform, i);

	1255 uint64_t src2_val = src2.Uint(vform, i);

	1256 uint64_t dst_val;

	1257 if (max) {

	1258 dst_val = (src1_val > src2_val) ? src1_val : src2_val;

	1259 } else {

	1260 dst_val = (src1_val < src2_val) ? src1_val : src2_val;

	1261 }

	1262 dst.SetUint(vform, i, dst_val);

	1263 }

	1264 return dst;

	1265 }

	1266

	1267 LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst,

	1268 const LogicVRegister& src1,

	1269 const LogicVRegister& src2) {

	1270 return UMinMax(vform, dst, src1, src2, true);

	1271 }

	1272

	1273 LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst,

	1274 const LogicVRegister& src1,

	1275 const LogicVRegister& src2) {

	1276 return UMinMax(vform, dst, src1, src2, false);

	1277 }

	1278

	1279 LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst,

	1280 const LogicVRegister& src1,

	1281 const LogicVRegister& src2, bool max) {

	1282 int lanes = LaneCountFromFormat(vform);

	1283 uint64_t result[kMaxLanesPerVector];

	1284 const LogicVRegister* src = &src1;

	1285 for (int j = 0; j < 2; j++) {

	1286 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {

	1287 uint64_t first_val = src->Uint(vform, i);

	1288 uint64_t second_val = src->Uint(vform, i + 1);

	1289 uint64_t dst_val;

	1290 if (max) {

	1291 dst_val = (first_val > second_val) ? first_val : second_val;

	1292 } else {

	1293 dst_val = (first_val < second_val) ? first_val : second_val;

	1294 }

	1295 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);

	1296 result[(i >> 1) + (j * lanes / 2)] = dst_val;

	1297 }

	1298 src = &src2;

	1299 }

	1300 dst.SetUintArray(vform, result);

	1301 return dst;

	1302 }

	1303

	1304 LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst,

	1305 const LogicVRegister& src1,

	1306 const LogicVRegister& src2) {

	1307 return UMinMaxP(vform, dst, src1, src2, true);

	1308 }

	1309

	1310 LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst,

	1311 const LogicVRegister& src1,

	1312 const LogicVRegister& src2) {

	1313 return UMinMaxP(vform, dst, src1, src2, false);

	1314 }

	1315

	1316 LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst,

	1317 const LogicVRegister& src, bool max) {

	1318 uint64_t dst_val = max ? 0 : UINT64_MAX;

	1319 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1320 uint64_t src_val = src.Uint(vform, i);

	1321 if (max) {

	1322 dst_val = (src_val > dst_val) ? src_val : dst_val;

	1323 } else {

	1324 dst_val = (src_val < dst_val) ? src_val : dst_val;

	1325 }

	1326 }

	1327 dst.ClearForWrite(ScalarFormatFromFormat(vform));

	1328 dst.SetUint(vform, 0, dst_val);

	1329 return dst;

	1330 }

	1331

	1332 LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst,

	1333 const LogicVRegister& src) {

	1334 UMinMaxV(vform, dst, src, true);

	1335 return dst;

	1336 }

	1337

	1338 LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst,

	1339 const LogicVRegister& src) {

	1340 UMinMaxV(vform, dst, src, false);

	1341 return dst;

	1342 }

	1343

	1344 LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst,

	1345 const LogicVRegister& src, int shift) {

	1346 DCHECK_GE(shift, 0);

	1347 SimVRegister temp;

	1348 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);

	1349 return ushl(vform, dst, src, shiftreg);

	1350 }

	1351

	1352 LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst,

	1353 const LogicVRegister& src, int shift) {

	1354 DCHECK_GE(shift, 0);

	1355 SimVRegister temp1, temp2;

	1356 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);

	1357 LogicVRegister extendedreg = sxtl(vform, temp2, src);

	1358 return sshl(vform, dst, extendedreg, shiftreg);

	1359 }

	1360

	1361 LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst,

	1362 const LogicVRegister& src, int shift) {

	1363 DCHECK_GE(shift, 0);

	1364 SimVRegister temp1, temp2;

	1365 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);

	1366 LogicVRegister extendedreg = sxtl2(vform, temp2, src);

	1367 return sshl(vform, dst, extendedreg, shiftreg);

	1368 }

	1369

	1370 LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst,

	1371 const LogicVRegister& src) {

	1372 int shift = LaneSizeInBitsFromFormat(vform) / 2;

	1373 return sshll(vform, dst, src, shift);

	1374 }

	1375

	1376 LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst,

	1377 const LogicVRegister& src) {

	1378 int shift = LaneSizeInBitsFromFormat(vform) / 2;

	1379 return sshll2(vform, dst, src, shift);

	1380 }

	1381

	1382 LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst,

	1383 const LogicVRegister& src, int shift) {

	1384 DCHECK_GE(shift, 0);

	1385 SimVRegister temp1, temp2;

	1386 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);

	1387 LogicVRegister extendedreg = uxtl(vform, temp2, src);

	1388 return ushl(vform, dst, extendedreg, shiftreg);

	1389 }

	1390

	1391 LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst,

	1392 const LogicVRegister& src, int shift) {

	1393 DCHECK_GE(shift, 0);

	1394 SimVRegister temp1, temp2;

	1395 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);

	1396 LogicVRegister extendedreg = uxtl2(vform, temp2, src);

	1397 return ushl(vform, dst, extendedreg, shiftreg);

	1398 }

	1399

	1400 LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst,

	1401 const LogicVRegister& src, int shift) {

	1402 dst.ClearForWrite(vform);

	1403 int laneCount = LaneCountFromFormat(vform);

	1404 for (int i = 0; i < laneCount; i++) {

	1405 uint64_t src_lane = src.Uint(vform, i);

	1406 uint64_t dst_lane = dst.Uint(vform, i);

	1407 uint64_t shifted = src_lane << shift;

	1408 uint64_t mask = MaxUintFromFormat(vform) << shift;

	1409 dst.SetUint(vform, i, (dst_lane & ~mask) \| shifted);

	1410 }

	1411 return dst;

	1412 }

	1413

	1414 LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst,

	1415 const LogicVRegister& src, int shift) {

	1416 DCHECK_GE(shift, 0);

	1417 SimVRegister temp;

	1418 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);

	1419 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);

	1420 }

	1421

	1422 LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst,

	1423 const LogicVRegister& src, int shift) {

	1424 DCHECK_GE(shift, 0);

	1425 SimVRegister temp;

	1426 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);

	1427 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);

	1428 }

	1429

	1430 LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst,

	1431 const LogicVRegister& src, int shift) {

	1432 DCHECK_GE(shift, 0);

	1433 SimVRegister temp;

	1434 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);

	1435 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);

	1436 }

	1437

	1438 LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst,

	1439 const LogicVRegister& src, int shift) {

	1440 dst.ClearForWrite(vform);

	1441 int laneCount = LaneCountFromFormat(vform);

	1442 DCHECK((shift > 0) &&

	1443 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));

	1444 for (int i = 0; i < laneCount; i++) {

	1445 uint64_t src_lane = src.Uint(vform, i);

	1446 uint64_t dst_lane = dst.Uint(vform, i);

	1447 uint64_t shifted;

	1448 uint64_t mask;

	1449 if (shift == 64) {

	1450 shifted = 0;

	1451 mask = 0;

	1452 } else {

	1453 shifted = src_lane >> shift;

	1454 mask = MaxUintFromFormat(vform) >> shift;

	1455 }

	1456 dst.SetUint(vform, i, (dst_lane & ~mask) \| shifted);

	1457 }

	1458 return dst;

	1459 }

	1460

	1461 LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst,

	1462 const LogicVRegister& src, int shift) {

	1463 DCHECK_GE(shift, 0);

	1464 SimVRegister temp;

	1465 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);

	1466 return ushl(vform, dst, src, shiftreg);

	1467 }

	1468

	1469 LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst,

	1470 const LogicVRegister& src, int shift) {

	1471 DCHECK_GE(shift, 0);

	1472 SimVRegister temp;

	1473 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);

	1474 return sshl(vform, dst, src, shiftreg);

	1475 }

	1476

	1477 LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst,

	1478 const LogicVRegister& src, int shift) {

	1479 SimVRegister temp;

	1480 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);

	1481 return add(vform, dst, dst, shifted_reg);

	1482 }

	1483

	1484 LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst,

	1485 const LogicVRegister& src, int shift) {

	1486 SimVRegister temp;

	1487 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);

	1488 return add(vform, dst, dst, shifted_reg);

	1489 }

	1490

	1491 LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst,

	1492 const LogicVRegister& src, int shift) {

	1493 SimVRegister temp;

	1494 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);

	1495 return add(vform, dst, dst, shifted_reg);

	1496 }

	1497

	1498 LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst,

	1499 const LogicVRegister& src, int shift) {

	1500 SimVRegister temp;

	1501 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);

	1502 return add(vform, dst, dst, shifted_reg);

	1503 }

	1504

	1505 LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst,

	1506 const LogicVRegister& src) {

	1507 uint64_t result[16];

	1508 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);

	1509 int laneCount = LaneCountFromFormat(vform);

	1510 for (int i = 0; i < laneCount; i++) {

	1511 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);

	1512 }

	1513

	1514 dst.SetUintArray(vform, result);

	1515 return dst;

	1516 }

	1517

	1518 LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst,

	1519 const LogicVRegister& src) {

	1520 uint64_t result[16];

	1521 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);

	1522 int laneCount = LaneCountFromFormat(vform);

	1523 for (int i = 0; i < laneCount; i++) {

	1524 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);

	1525 }

	1526

	1527 dst.SetUintArray(vform, result);

	1528 return dst;

	1529 }

	1530

	1531 LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst,

	1532 const LogicVRegister& src) {

	1533 uint64_t result[16];

	1534 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);

	1535 int laneCount = LaneCountFromFormat(vform);

	1536 for (int i = 0; i < laneCount; i++) {

	1537 uint64_t value = src.Uint(vform, i);

	1538 result[i] = 0;

	1539 for (int j = 0; j < laneSizeInBits; j++) {

	1540 result[i] += (value & 1);

	1541 value >>= 1;

	1542 }

	1543 }

	1544

	1545 dst.SetUintArray(vform, result);

	1546 return dst;

	1547 }

	1548

	1549 LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst,

	1550 const LogicVRegister& src1,

	1551 const LogicVRegister& src2) {

	1552 dst.ClearForWrite(vform);

	1553 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1554 int8_t shift_val = src2.Int(vform, i);

	1555 int64_t lj_src_val = src1.IntLeftJustified(vform, i);

	1556

	1557 // Set signed saturation state.

	1558 if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) &&

	1559 (lj_src_val != 0)) {

	1560 dst.SetSignedSat(i, lj_src_val >= 0);

	1561 }

	1562

	1563 // Set unsigned saturation state.

	1564 if (lj_src_val < 0) {

	1565 dst.SetUnsignedSat(i, false);

	1566 } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) &&

	1567 (lj_src_val != 0)) {

	1568 dst.SetUnsignedSat(i, true);

	1569 }

	1570

	1571 int64_t src_val = src1.Int(vform, i);

	1572 bool src_is_negative = src_val < 0;

	1573 if (shift_val > 63) {

	1574 dst.SetInt(vform, i, 0);

	1575 } else if (shift_val < -63) {

	1576 dst.SetRounding(i, src_is_negative);

	1577 dst.SetInt(vform, i, src_is_negative ? -1 : 0);

	1578 } else {

	1579 // Use unsigned types for shifts, as behaviour is undefined for signed

	1580 // lhs.

	1581 uint64_t usrc_val = static_cast<uint64_t>(src_val);

	1582

	1583 if (shift_val < 0) {

	1584 // Convert to right shift.

	1585 shift_val = -shift_val;

	1586

	1587 // Set rounding state by testing most-significant bit shifted out.

	1588 // Rounding only needed on right shifts.

	1589 if (((usrc_val >> (shift_val - 1)) & 1) == 1) {

	1590 dst.SetRounding(i, true);

	1591 }

	1592

	1593 usrc_val >>= shift_val;

	1594

	1595 if (src_is_negative) {

	1596 // Simulate sign-extension.

	1597 usrc_val \|= (~UINT64_C(0) << (64 - shift_val));

	1598 }

	1599 } else {

	1600 usrc_val <<= shift_val;

	1601 }

	1602 dst.SetUint(vform, i, usrc_val);

	1603 }

	1604 }

	1605 return dst;

	1606 }

	1607

	1608 LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst,

	1609 const LogicVRegister& src1,

	1610 const LogicVRegister& src2) {

	1611 dst.ClearForWrite(vform);

	1612 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1613 int8_t shift_val = src2.Int(vform, i);

	1614 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);

	1615

	1616 // Set saturation state.

	1617 if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) {

	1618 dst.SetUnsignedSat(i, true);

	1619 }

	1620

	1621 uint64_t src_val = src1.Uint(vform, i);

	1622 if ((shift_val > 63) \|\| (shift_val < -64)) {

	1623 dst.SetUint(vform, i, 0);

	1624 } else {

	1625 if (shift_val < 0) {

	1626 // Set rounding state. Rounding only needed on right shifts.

	1627 if (((src_val >> (-shift_val - 1)) & 1) == 1) {

	1628 dst.SetRounding(i, true);

	1629 }

	1630

	1631 if (shift_val == -64) {

	1632 src_val = 0;

	1633 } else {

	1634 src_val >>= -shift_val;

	1635 }

	1636 } else {

	1637 src_val <<= shift_val;

	1638 }

	1639 dst.SetUint(vform, i, src_val);

	1640 }

	1641 }

	1642 return dst;

	1643 }

	1644

	1645 LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst,

	1646 const LogicVRegister& src) {

	1647 dst.ClearForWrite(vform);

	1648 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1649 // Test for signed saturation.

	1650 int64_t sa = src.Int(vform, i);

	1651 if (sa == MinIntFromFormat(vform)) {

	1652 dst.SetSignedSat(i, true);

	1653 }

	1654 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);

	1655 }

	1656 return dst;

	1657 }

	1658

	1659 LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst,

	1660 const LogicVRegister& src) {

	1661 dst.ClearForWrite(vform);

	1662 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1663 int64_t sa = dst.IntLeftJustified(vform, i);

	1664 uint64_t ub = src.UintLeftJustified(vform, i);

	1665 uint64_t ur = sa + ub;

	1666

	1667 int64_t sr = bit_cast<int64_t>(ur);

	1668 if (sr < sa) { // Test for signed positive saturation.

	1669 dst.SetInt(vform, i, MaxIntFromFormat(vform));

	1670 } else {

	1671 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));

	1672 }

	1673 }

	1674 return dst;

	1675 }

	1676

	1677 LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst,

	1678 const LogicVRegister& src) {

	1679 dst.ClearForWrite(vform);

	1680 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1681 uint64_t ua = dst.UintLeftJustified(vform, i);

	1682 int64_t sb = src.IntLeftJustified(vform, i);

	1683 uint64_t ur = ua + sb;

	1684

	1685 if ((sb > 0) && (ur <= ua)) {

	1686 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.

	1687 } else if ((sb < 0) && (ur >= ua)) {

	1688 dst.SetUint(vform, i, 0); // Negative saturation.

	1689 } else {

	1690 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));

	1691 }

	1692 }

	1693 return dst;

	1694 }

	1695

	1696 LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst,

	1697 const LogicVRegister& src) {

	1698 dst.ClearForWrite(vform);

	1699 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1700 // Test for signed saturation.

	1701 int64_t sa = src.Int(vform, i);

	1702 if (sa == MinIntFromFormat(vform)) {

	1703 dst.SetSignedSat(i, true);

	1704 }

	1705 if (sa < 0) {

	1706 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);

	1707 } else {

	1708 dst.SetInt(vform, i, sa);

	1709 }

	1710 }

	1711 return dst;

	1712 }

	1713

	1714 LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform,

	1715 LogicVRegister dst, bool dstIsSigned,

	1716 const LogicVRegister& src,

	1717 bool srcIsSigned) {

	1718 bool upperhalf = false;

	1719 VectorFormat srcform = kFormatUndefined;

	1720 int64_t ssrc[8];

	1721 uint64_t usrc[8];

	1722

	1723 switch (dstform) {

	1724 case kFormat8B:

	1725 upperhalf = false;

	1726 srcform = kFormat8H;

	1727 break;

	1728 case kFormat16B:

	1729 upperhalf = true;

	1730 srcform = kFormat8H;

	1731 break;

	1732 case kFormat4H:

	1733 upperhalf = false;

	1734 srcform = kFormat4S;

	1735 break;

	1736 case kFormat8H:

	1737 upperhalf = true;

	1738 srcform = kFormat4S;

	1739 break;

	1740 case kFormat2S:

	1741 upperhalf = false;

	1742 srcform = kFormat2D;

	1743 break;

	1744 case kFormat4S:

	1745 upperhalf = true;

	1746 srcform = kFormat2D;

	1747 break;

	1748 case kFormatB:

	1749 upperhalf = false;

	1750 srcform = kFormatH;

	1751 break;

	1752 case kFormatH:

	1753 upperhalf = false;

	1754 srcform = kFormatS;

	1755 break;

	1756 case kFormatS:

	1757 upperhalf = false;

	1758 srcform = kFormatD;

	1759 break;

	1760 default:

	1761 UNIMPLEMENTED();

	1762 }

	1763

	1764 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {

	1765 ssrc[i] = src.Int(srcform, i);

	1766 usrc[i] = src.Uint(srcform, i);

	1767 }

	1768

	1769 int offset;

	1770 if (upperhalf) {

	1771 offset = LaneCountFromFormat(dstform) / 2;

	1772 } else {

	1773 offset = 0;

	1774 dst.ClearForWrite(dstform);

	1775 }

	1776

	1777 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {

	1778 // Test for signed saturation

	1779 if (ssrc[i] > MaxIntFromFormat(dstform)) {

	1780 dst.SetSignedSat(offset + i, true);

	1781 } else if (ssrc[i] < MinIntFromFormat(dstform)) {

	1782 dst.SetSignedSat(offset + i, false);

	1783 }

	1784

	1785 // Test for unsigned saturation

	1786 if (srcIsSigned) {

	1787 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {

	1788 dst.SetUnsignedSat(offset + i, true);

	1789 } else if (ssrc[i] < 0) {

	1790 dst.SetUnsignedSat(offset + i, false);

	1791 }

	1792 } else {

	1793 if (usrc[i] > MaxUintFromFormat(dstform)) {

	1794 dst.SetUnsignedSat(offset + i, true);

	1795 }

	1796 }

	1797

	1798 int64_t result;

	1799 if (srcIsSigned) {

	1800 result = ssrc[i] & MaxUintFromFormat(dstform);

	1801 } else {

	1802 result = usrc[i] & MaxUintFromFormat(dstform);

	1803 }

	1804

	1805 if (dstIsSigned) {

	1806 dst.SetInt(dstform, offset + i, result);

	1807 } else {

	1808 dst.SetUint(dstform, offset + i, result);

	1809 }

	1810 }

	1811 return dst;

	1812 }

	1813

	1814 LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst,

	1815 const LogicVRegister& src) {

	1816 return ExtractNarrow(vform, dst, true, src, true);

	1817 }

	1818

	1819 LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst,

	1820 const LogicVRegister& src) {

	1821 return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform);

	1822 }

	1823

	1824 LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst,

	1825 const LogicVRegister& src) {

	1826 return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform);

	1827 }

	1828

	1829 LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst,

	1830 const LogicVRegister& src) {

	1831 return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform);

	1832 }

	1833

	1834 LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst,

	1835 const LogicVRegister& src1,

	1836 const LogicVRegister& src2, bool issigned) {

	1837 dst.ClearForWrite(vform);

	1838 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1839 if (issigned) {

	1840 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);

	1841 sr = sr > 0 ? sr : -sr;

	1842 dst.SetInt(vform, i, sr);

	1843 } else {

	1844 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);

	1845 sr = sr > 0 ? sr : -sr;

	1846 dst.SetUint(vform, i, sr);

	1847 }

	1848 }

	1849 return dst;

	1850 }

	1851

	1852 LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst,

	1853 const LogicVRegister& src1,

	1854 const LogicVRegister& src2) {

	1855 SimVRegister temp;

	1856 dst.ClearForWrite(vform);

	1857 AbsDiff(vform, temp, src1, src2, true);

	1858 add(vform, dst, dst, temp);

	1859 return dst;

	1860 }

	1861

	1862 LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst,

	1863 const LogicVRegister& src1,

	1864 const LogicVRegister& src2) {

	1865 SimVRegister temp;

	1866 dst.ClearForWrite(vform);

	1867 AbsDiff(vform, temp, src1, src2, false);

	1868 add(vform, dst, dst, temp);

	1869 return dst;

	1870 }

	1871

	1872 LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst,

	1873 const LogicVRegister& src) {

	1874 dst.ClearForWrite(vform);

	1875 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1876 dst.SetUint(vform, i, ~src.Uint(vform, i));

	1877 }

	1878 return dst;

	1879 }

	1880

	1881 LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst,

	1882 const LogicVRegister& src) {

	1883 uint64_t result[16];

	1884 int laneCount = LaneCountFromFormat(vform);

	1885 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);

	1886 uint64_t reversed_value;

	1887 uint64_t value;

	1888 for (int i = 0; i < laneCount; i++) {

	1889 value = src.Uint(vform, i);

	1890 reversed_value = 0;

	1891 for (int j = 0; j < laneSizeInBits; j++) {

	1892 reversed_value = (reversed_value << 1) \| (value & 1);

	1893 value >>= 1;

	1894 }

	1895 result[i] = reversed_value;

	1896 }

	1897

	1898 dst.SetUintArray(vform, result);

	1899 return dst;

	1900 }

	1901

	1902 LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst,

	1903 const LogicVRegister& src, int revSize) {

	1904 uint64_t result[16];

	1905 int laneCount = LaneCountFromFormat(vform);

	1906 int laneSize = LaneSizeInBytesFromFormat(vform);

	1907 int lanesPerLoop = revSize / laneSize;

	1908 for (int i = 0; i < laneCount; i += lanesPerLoop) {

	1909 for (int j = 0; j < lanesPerLoop; j++) {

	1910 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);

	1911 }

	1912 }

	1913 dst.SetUintArray(vform, result);

	1914 return dst;

	1915 }

	1916

	1917 LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst,

	1918 const LogicVRegister& src) {

	1919 return rev(vform, dst, src, 2);

	1920 }

	1921

	1922 LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst,

	1923 const LogicVRegister& src) {

	1924 return rev(vform, dst, src, 4);

	1925 }

	1926

	1927 LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst,

	1928 const LogicVRegister& src) {

	1929 return rev(vform, dst, src, 8);

	1930 }

	1931

	1932 LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst,

	1933 const LogicVRegister& src, bool is_signed,

	1934 bool do_accumulate) {

	1935 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);

	1936 DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U);

	1937 DCHECK_LE(LaneCountFromFormat(vform), 8);

	1938

	1939 uint64_t result[8];

	1940 int lane_count = LaneCountFromFormat(vform);

	1941 for (int i = 0; i < lane_count; i++) {

	1942 if (is_signed) {

	1943 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +

	1944 src.Int(vformsrc, 2 * i + 1));

	1945 } else {

	1946 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);

	1947 }

	1948 }

	1949

	1950 dst.ClearForWrite(vform);

	1951 for (int i = 0; i < lane_count; ++i) {

	1952 if (do_accumulate) {

	1953 result[i] += dst.Uint(vform, i);

	1954 }

	1955 dst.SetUint(vform, i, result[i]);

	1956 }

	1957

	1958 return dst;

	1959 }

	1960

	1961 LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst,

	1962 const LogicVRegister& src) {

	1963 return addlp(vform, dst, src, true, false);

	1964 }

	1965

	1966 LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst,

	1967 const LogicVRegister& src) {

	1968 return addlp(vform, dst, src, false, false);

	1969 }

	1970

	1971 LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst,

	1972 const LogicVRegister& src) {

	1973 return addlp(vform, dst, src, true, true);

	1974 }

	1975

	1976 LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst,

	1977 const LogicVRegister& src) {

	1978 return addlp(vform, dst, src, false, true);

	1979 }

	1980

	1981 LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst,

	1982 const LogicVRegister& src1,

	1983 const LogicVRegister& src2, int index) {

	1984 uint8_t result[16];

	1985 int laneCount = LaneCountFromFormat(vform);

	1986 for (int i = 0; i < laneCount - index; ++i) {

	1987 result[i] = src1.Uint(vform, i + index);

	1988 }

	1989 for (int i = 0; i < index; ++i) {

	1990 result[laneCount - index + i] = src2.Uint(vform, i);

	1991 }

	1992 dst.ClearForWrite(vform);

	1993 for (int i = 0; i < laneCount; ++i) {

	1994 dst.SetUint(vform, i, result[i]);

	1995 }

	1996 return dst;

	1997 }

	1998

	1999 LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst,

	2000 const LogicVRegister& src,

	2001 int src_index) {

	2002 int laneCount = LaneCountFromFormat(vform);

	2003 uint64_t value = src.Uint(vform, src_index);

	2004 dst.ClearForWrite(vform);

	2005 for (int i = 0; i < laneCount; ++i) {

	2006 dst.SetUint(vform, i, value);

	2007 }

	2008 return dst;

	2009 }

	2010

	2011 LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst,

	2012 uint64_t imm) {

	2013 int laneCount = LaneCountFromFormat(vform);

	2014 uint64_t value = imm & MaxUintFromFormat(vform);

	2015 dst.ClearForWrite(vform);

	2016 for (int i = 0; i < laneCount; ++i) {

	2017 dst.SetUint(vform, i, value);

	2018 }

	2019 return dst;

	2020 }

	2021

	2022 LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst,

	2023 int dst_index, const LogicVRegister& src,

	2024 int src_index) {

	2025 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));

	2026 return dst;

	2027 }

	2028

	2029 LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst,

	2030 int dst_index, uint64_t imm) {

	2031 uint64_t value = imm & MaxUintFromFormat(vform);

	2032 dst.SetUint(vform, dst_index, value);

	2033 return dst;

	2034 }

	2035

	2036 LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst,

	2037 uint64_t imm) {

	2038 int laneCount = LaneCountFromFormat(vform);

	2039 dst.ClearForWrite(vform);

	2040 for (int i = 0; i < laneCount; ++i) {

	2041 dst.SetUint(vform, i, imm);

	2042 }

	2043 return dst;

	2044 }

	2045

	2046 LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst,

	2047 uint64_t imm) {

	2048 int laneCount = LaneCountFromFormat(vform);

	2049 dst.ClearForWrite(vform);

	2050 for (int i = 0; i < laneCount; ++i) {

	2051 dst.SetUint(vform, i, ~imm);

	2052 }

	2053 return dst;

	2054 }

	2055

	2056 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,

	2057 const LogicVRegister& src, uint64_t imm) {

	2058 uint64_t result[16];

	2059 int laneCount = LaneCountFromFormat(vform);

	2060 for (int i = 0; i < laneCount; ++i) {

	2061 result[i] = src.Uint(vform, i) \| imm;

	2062 }

	2063 dst.SetUintArray(vform, result);

	2064 return dst;

	2065 }

	2066

	2067 LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst,

	2068 const LogicVRegister& src) {

	2069 VectorFormat vform_half = VectorFormatHalfWidth(vform);

	2070

	2071 dst.ClearForWrite(vform);

	2072 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	2073 dst.SetUint(vform, i, src.Uint(vform_half, i));

	2074 }

	2075 return dst;

	2076 }

	2077

	2078 LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst,

	2079 const LogicVRegister& src) {

	2080 VectorFormat vform_half = VectorFormatHalfWidth(vform);

	2081

	2082 dst.ClearForWrite(vform);

	2083 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	2084 dst.SetInt(vform, i, src.Int(vform_half, i));

	2085 }

	2086 return dst;

	2087 }

	2088

	2089 LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst,

	2090 const LogicVRegister& src) {

	2091 VectorFormat vform_half = VectorFormatHalfWidth(vform);

	2092 int lane_count = LaneCountFromFormat(vform);

	2093

	2094 dst.ClearForWrite(vform);

	2095 for (int i = 0; i < lane_count; i++) {

	2096 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));

	2097 }

	2098 return dst;

	2099 }

	2100

	2101 LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst,

	2102 const LogicVRegister& src) {

	2103 VectorFormat vform_half = VectorFormatHalfWidth(vform);

	2104 int lane_count = LaneCountFromFormat(vform);

	2105

	2106 dst.ClearForWrite(vform);

	2107 for (int i = 0; i < lane_count; i++) {

	2108 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));

	2109 }

	2110 return dst;

	2111 }

	2112

	2113 LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst,

	2114 const LogicVRegister& src, int shift) {

	2115 SimVRegister temp;

	2116 VectorFormat vform_src = VectorFormatDoubleWidth(vform);

	2117 VectorFormat vform_dst = vform;

	2118 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);

	2119 return ExtractNarrow(vform_dst, dst, false, shifted_src, false);

	2120 }

	2121

	2122 LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst,

	2123 const LogicVRegister& src, int shift) {

	2124 SimVRegister temp;

	2125 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));

	2126 VectorFormat vformdst = vform;

	2127 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);

	2128 return ExtractNarrow(vformdst, dst, false, shifted_src, false);

	2129 }

	2130

	2131 LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst,

	2132 const LogicVRegister& src, int shift) {

	2133 SimVRegister temp;

	2134 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);

	2135 VectorFormat vformdst = vform;

	2136 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);

	2137 return ExtractNarrow(vformdst, dst, false, shifted_src, false);

	2138 }

	2139

	2140 LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst,

	2141 const LogicVRegister& src, int shift) {

	2142 SimVRegister temp;

	2143 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));

	2144 VectorFormat vformdst = vform;

	2145 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);

	2146 return ExtractNarrow(vformdst, dst, false, shifted_src, false);

	2147 }

	2148

	2149 LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst,

	2150 const LogicVRegister& ind,

	2151 bool zero_out_of_bounds,

	2152 const LogicVRegister* tab1,

	2153 const LogicVRegister* tab2,

	2154 const LogicVRegister* tab3,

	2155 const LogicVRegister* tab4) {

	2156 DCHECK_NOT_NULL(tab1);

	2157 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};

	2158 uint64_t result[kMaxLanesPerVector];

	2159 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	2160 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);

	2161 }

	2162 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	2163 uint64_t j = ind.Uint(vform, i);

	2164 int tab_idx = static_cast<int>(j >> 4);

	2165 int j_idx = static_cast<int>(j & 15);

	2166 if ((tab_idx < 4) && (tab[tab_idx] != NULL)) {

	2167 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);

	2168 }

	2169 }

	2170 dst.SetUintArray(vform, result);

	2171 return dst;

	2172 }

	2173

	2174 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,

	2175 const LogicVRegister& tab,

	2176 const LogicVRegister& ind) {

	2177 return Table(vform, dst, ind, true, &tab);

	2178 }

	2179

	2180 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,

	2181 const LogicVRegister& tab,

	2182 const LogicVRegister& tab2,

	2183 const LogicVRegister& ind) {

	2184 return Table(vform, dst, ind, true, &tab, &tab2);

	2185 }

	2186

	2187 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,

	2188 const LogicVRegister& tab,

	2189 const LogicVRegister& tab2,

	2190 const LogicVRegister& tab3,

	2191 const LogicVRegister& ind) {

	2192 return Table(vform, dst, ind, true, &tab, &tab2, &tab3);

	2193 }

	2194

	2195 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,

	2196 const LogicVRegister& tab,

	2197 const LogicVRegister& tab2,

	2198 const LogicVRegister& tab3,

	2199 const LogicVRegister& tab4,

	2200 const LogicVRegister& ind) {

	2201 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);

	2202 }

	2203

	2204 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,

	2205 const LogicVRegister& tab,

	2206 const LogicVRegister& ind) {

	2207 return Table(vform, dst, ind, false, &tab);

	2208 }

	2209

	2210 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,

	2211 const LogicVRegister& tab,

	2212 const LogicVRegister& tab2,

	2213 const LogicVRegister& ind) {

	2214 return Table(vform, dst, ind, false, &tab, &tab2);

	2215 }

	2216

	2217 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,

	2218 const LogicVRegister& tab,

	2219 const LogicVRegister& tab2,

	2220 const LogicVRegister& tab3,

	2221 const LogicVRegister& ind) {

	2222 return Table(vform, dst, ind, false, &tab, &tab2, &tab3);

	2223 }

	2224

	2225 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,

	2226 const LogicVRegister& tab,

	2227 const LogicVRegister& tab2,

	2228 const LogicVRegister& tab3,

	2229 const LogicVRegister& tab4,

	2230 const LogicVRegister& ind) {

	2231 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);

	2232 }

	2233

	2234 LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst,

	2235 const LogicVRegister& src, int shift) {

	2236 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);

	2237 }

	2238

	2239 LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst,

	2240 const LogicVRegister& src, int shift) {

	2241 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);

	2242 }

	2243

	2244 LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst,

	2245 const LogicVRegister& src, int shift) {

	2246 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);

	2247 }

	2248

	2249 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst,

	2250 const LogicVRegister& src, int shift) {

	2251 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);

	2252 }

	2253

	2254 LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst,

	2255 const LogicVRegister& src, int shift) {

	2256 SimVRegister temp;

	2257 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);

	2258 VectorFormat vformdst = vform;

	2259 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);

	2260 return sqxtn(vformdst, dst, shifted_src);

	2261 }

	2262

	2263 LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst,

	2264 const LogicVRegister& src, int shift) {

	2265 SimVRegister temp;

	2266 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));

	2267 VectorFormat vformdst = vform;

	2268 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);

	2269 return sqxtn(vformdst, dst, shifted_src);

	2270 }

	2271

	2272 LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst,

	2273 const LogicVRegister& src, int shift) {

	2274 SimVRegister temp;

	2275 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);

	2276 VectorFormat vformdst = vform;

	2277 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);

	2278 return sqxtn(vformdst, dst, shifted_src);

	2279 }

	2280

	2281 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst,

	2282 const LogicVRegister& src, int shift) {

	2283 SimVRegister temp;

	2284 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));

	2285 VectorFormat vformdst = vform;

	2286 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);

	2287 return sqxtn(vformdst, dst, shifted_src);

	2288 }

	2289

	2290 LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst,

	2291 const LogicVRegister& src, int shift) {

	2292 SimVRegister temp;

	2293 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);

	2294 VectorFormat vformdst = vform;

	2295 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);

	2296 return sqxtun(vformdst, dst, shifted_src);

	2297 }

	2298

	2299 LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst,

	2300 const LogicVRegister& src, int shift) {

	2301 SimVRegister temp;

	2302 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));

	2303 VectorFormat vformdst = vform;

	2304 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);

	2305 return sqxtun(vformdst, dst, shifted_src);

	2306 }

	2307

	2308 LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst,

	2309 const LogicVRegister& src, int shift) {

	2310 SimVRegister temp;

	2311 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);

	2312 VectorFormat vformdst = vform;

	2313 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);

	2314 return sqxtun(vformdst, dst, shifted_src);

	2315 }

	2316

	2317 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst,

	2318 const LogicVRegister& src, int shift) {

	2319 SimVRegister temp;

	2320 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));

	2321 VectorFormat vformdst = vform;

	2322 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);

	2323 return sqxtun(vformdst, dst, shifted_src);

	2324 }

	2325

	2326 LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst,

	2327 const LogicVRegister& src1,

	2328 const LogicVRegister& src2) {

	2329 SimVRegister temp1, temp2;

	2330 uxtl(vform, temp1, src1);

	2331 uxtl(vform, temp2, src2);

	2332 add(vform, dst, temp1, temp2);

	2333 return dst;

	2334 }

	2335

	2336 LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst,

	2337 const LogicVRegister& src1,

	2338 const LogicVRegister& src2) {

	2339 SimVRegister temp1, temp2;

	2340 uxtl2(vform, temp1, src1);

	2341 uxtl2(vform, temp2, src2);

	2342 add(vform, dst, temp1, temp2);

	2343 return dst;

	2344 }

	2345

	2346 LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst,

	2347 const LogicVRegister& src1,

	2348 const LogicVRegister& src2) {

	2349 SimVRegister temp;

	2350 uxtl(vform, temp, src2);

	2351 add(vform, dst, src1, temp);

	2352 return dst;

	2353 }

	2354

	2355 LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst,

	2356 const LogicVRegister& src1,

	2357 const LogicVRegister& src2) {

	2358 SimVRegister temp;

	2359 uxtl2(vform, temp, src2);

	2360 add(vform, dst, src1, temp);

	2361 return dst;

	2362 }

	2363

	2364 LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst,

	2365 const LogicVRegister& src1,

	2366 const LogicVRegister& src2) {

	2367 SimVRegister temp1, temp2;

	2368 sxtl(vform, temp1, src1);

	2369 sxtl(vform, temp2, src2);

	2370 add(vform, dst, temp1, temp2);

	2371 return dst;

	2372 }

	2373

	2374 LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst,

	2375 const LogicVRegister& src1,

	2376 const LogicVRegister& src2) {

	2377 SimVRegister temp1, temp2;

	2378 sxtl2(vform, temp1, src1);

	2379 sxtl2(vform, temp2, src2);

	2380 add(vform, dst, temp1, temp2);

	2381 return dst;

	2382 }

	2383

	2384 LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst,

	2385 const LogicVRegister& src1,

	2386 const LogicVRegister& src2) {

	2387 SimVRegister temp;

	2388 sxtl(vform, temp, src2);

	2389 add(vform, dst, src1, temp);

	2390 return dst;

	2391 }

	2392

	2393 LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst,

	2394 const LogicVRegister& src1,

	2395 const LogicVRegister& src2) {

	2396 SimVRegister temp;

	2397 sxtl2(vform, temp, src2);

	2398 add(vform, dst, src1, temp);

	2399 return dst;

	2400 }

	2401

	2402 LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst,

	2403 const LogicVRegister& src1,

	2404 const LogicVRegister& src2) {

	2405 SimVRegister temp1, temp2;

	2406 uxtl(vform, temp1, src1);

	2407 uxtl(vform, temp2, src2);

	2408 sub(vform, dst, temp1, temp2);

	2409 return dst;

	2410 }

	2411

	2412 LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst,

	2413 const LogicVRegister& src1,

	2414 const LogicVRegister& src2) {

	2415 SimVRegister temp1, temp2;

	2416 uxtl2(vform, temp1, src1);

	2417 uxtl2(vform, temp2, src2);

	2418 sub(vform, dst, temp1, temp2);

	2419 return dst;

	2420 }

	2421

	2422 LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst,

	2423 const LogicVRegister& src1,

	2424 const LogicVRegister& src2) {

	2425 SimVRegister temp;

	2426 uxtl(vform, temp, src2);

	2427 sub(vform, dst, src1, temp);

	2428 return dst;

	2429 }

	2430

	2431 LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst,

	2432 const LogicVRegister& src1,

	2433 const LogicVRegister& src2) {

	2434 SimVRegister temp;

	2435 uxtl2(vform, temp, src2);

	2436 sub(vform, dst, src1, temp);

	2437 return dst;

	2438 }

	2439

	2440 LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst,

	2441 const LogicVRegister& src1,

	2442 const LogicVRegister& src2) {

	2443 SimVRegister temp1, temp2;

	2444 sxtl(vform, temp1, src1);

	2445 sxtl(vform, temp2, src2);

	2446 sub(vform, dst, temp1, temp2);

	2447 return dst;

	2448 }

	2449

	2450 LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst,

	2451 const LogicVRegister& src1,

	2452 const LogicVRegister& src2) {

	2453 SimVRegister temp1, temp2;

	2454 sxtl2(vform, temp1, src1);

	2455 sxtl2(vform, temp2, src2);

	2456 sub(vform, dst, temp1, temp2);

	2457 return dst;

	2458 }

	2459

	2460 LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst,

	2461 const LogicVRegister& src1,

	2462 const LogicVRegister& src2) {

	2463 SimVRegister temp;

	2464 sxtl(vform, temp, src2);

	2465 sub(vform, dst, src1, temp);

	2466 return dst;

	2467 }

	2468

	2469 LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst,

	2470 const LogicVRegister& src1,

	2471 const LogicVRegister& src2) {

	2472 SimVRegister temp;

	2473 sxtl2(vform, temp, src2);

	2474 sub(vform, dst, src1, temp);

	2475 return dst;

	2476 }

	2477

	2478 LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst,

	2479 const LogicVRegister& src1,

	2480 const LogicVRegister& src2) {

	2481 SimVRegister temp1, temp2;

	2482 uxtl(vform, temp1, src1);

	2483 uxtl(vform, temp2, src2);

	2484 uaba(vform, dst, temp1, temp2);

	2485 return dst;

	2486 }

	2487

	2488 LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst,

	2489 const LogicVRegister& src1,

	2490 const LogicVRegister& src2) {

	2491 SimVRegister temp1, temp2;

	2492 uxtl2(vform, temp1, src1);

	2493 uxtl2(vform, temp2, src2);

	2494 uaba(vform, dst, temp1, temp2);

	2495 return dst;

	2496 }

	2497

	2498 LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst,

	2499 const LogicVRegister& src1,

	2500 const LogicVRegister& src2) {

	2501 SimVRegister temp1, temp2;

	2502 sxtl(vform, temp1, src1);

	2503 sxtl(vform, temp2, src2);

	2504 saba(vform, dst, temp1, temp2);

	2505 return dst;

	2506 }

	2507

	2508 LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst,

	2509 const LogicVRegister& src1,

	2510 const LogicVRegister& src2) {

	2511 SimVRegister temp1, temp2;

	2512 sxtl2(vform, temp1, src1);

	2513 sxtl2(vform, temp2, src2);

	2514 saba(vform, dst, temp1, temp2);

	2515 return dst;

	2516 }

	2517

	2518 LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst,

	2519 const LogicVRegister& src1,

	2520 const LogicVRegister& src2) {

	2521 SimVRegister temp1, temp2;

	2522 uxtl(vform, temp1, src1);

	2523 uxtl(vform, temp2, src2);

	2524 AbsDiff(vform, dst, temp1, temp2, false);

	2525 return dst;

	2526 }

	2527

	2528 LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst,

	2529 const LogicVRegister& src1,

	2530 const LogicVRegister& src2) {

	2531 SimVRegister temp1, temp2;

	2532 uxtl2(vform, temp1, src1);

	2533 uxtl2(vform, temp2, src2);

	2534 AbsDiff(vform, dst, temp1, temp2, false);

	2535 return dst;

	2536 }

	2537

	2538 LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst,

	2539 const LogicVRegister& src1,

	2540 const LogicVRegister& src2) {

	2541 SimVRegister temp1, temp2;

	2542 sxtl(vform, temp1, src1);

	2543 sxtl(vform, temp2, src2);

	2544 AbsDiff(vform, dst, temp1, temp2, true);

	2545 return dst;

	2546 }

	2547

	2548 LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst,

	2549 const LogicVRegister& src1,

	2550 const LogicVRegister& src2) {

	2551 SimVRegister temp1, temp2;

	2552 sxtl2(vform, temp1, src1);

	2553 sxtl2(vform, temp2, src2);

	2554 AbsDiff(vform, dst, temp1, temp2, true);

	2555 return dst;

	2556 }

	2557

	2558 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,

	2559 const LogicVRegister& src1,

	2560 const LogicVRegister& src2) {

	2561 SimVRegister temp1, temp2;

	2562 uxtl(vform, temp1, src1);

	2563 uxtl(vform, temp2, src2);

	2564 mul(vform, dst, temp1, temp2);

	2565 return dst;

	2566 }

	2567

	2568 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,

	2569 const LogicVRegister& src1,

	2570 const LogicVRegister& src2) {

	2571 SimVRegister temp1, temp2;

	2572 uxtl2(vform, temp1, src1);

	2573 uxtl2(vform, temp2, src2);

	2574 mul(vform, dst, temp1, temp2);

	2575 return dst;

	2576 }

	2577

	2578 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,

	2579 const LogicVRegister& src1,

	2580 const LogicVRegister& src2) {

	2581 SimVRegister temp1, temp2;

	2582 sxtl(vform, temp1, src1);

	2583 sxtl(vform, temp2, src2);

	2584 mul(vform, dst, temp1, temp2);

	2585 return dst;

	2586 }

	2587

	2588 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,

	2589 const LogicVRegister& src1,

	2590 const LogicVRegister& src2) {

	2591 SimVRegister temp1, temp2;

	2592 sxtl2(vform, temp1, src1);

	2593 sxtl2(vform, temp2, src2);

	2594 mul(vform, dst, temp1, temp2);

	2595 return dst;

	2596 }

	2597

	2598 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,

	2599 const LogicVRegister& src1,

	2600 const LogicVRegister& src2) {

	2601 SimVRegister temp1, temp2;

	2602 uxtl(vform, temp1, src1);

	2603 uxtl(vform, temp2, src2);

	2604 mls(vform, dst, temp1, temp2);

	2605 return dst;

	2606 }

	2607

	2608 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,

	2609 const LogicVRegister& src1,

	2610 const LogicVRegister& src2) {

	2611 SimVRegister temp1, temp2;

	2612 uxtl2(vform, temp1, src1);

	2613 uxtl2(vform, temp2, src2);

	2614 mls(vform, dst, temp1, temp2);

	2615 return dst;

	2616 }

	2617

	2618 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,

	2619 const LogicVRegister& src1,

	2620 const LogicVRegister& src2) {

	2621 SimVRegister temp1, temp2;

	2622 sxtl(vform, temp1, src1);

	2623 sxtl(vform, temp2, src2);

	2624 mls(vform, dst, temp1, temp2);

	2625 return dst;

	2626 }

	2627

	2628 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,

	2629 const LogicVRegister& src1,

	2630 const LogicVRegister& src2) {

	2631 SimVRegister temp1, temp2;

	2632 sxtl2(vform, temp1, src1);

	2633 sxtl2(vform, temp2, src2);

	2634 mls(vform, dst, temp1, temp2);

	2635 return dst;

	2636 }

	2637

	2638 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,

	2639 const LogicVRegister& src1,

	2640 const LogicVRegister& src2) {

	2641 SimVRegister temp1, temp2;

	2642 uxtl(vform, temp1, src1);

	2643 uxtl(vform, temp2, src2);

	2644 mla(vform, dst, temp1, temp2);

	2645 return dst;

	2646 }

	2647

	2648 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,

	2649 const LogicVRegister& src1,

	2650 const LogicVRegister& src2) {

	2651 SimVRegister temp1, temp2;

	2652 uxtl2(vform, temp1, src1);

	2653 uxtl2(vform, temp2, src2);

	2654 mla(vform, dst, temp1, temp2);

	2655 return dst;

	2656 }

	2657

	2658 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,

	2659 const LogicVRegister& src1,

	2660 const LogicVRegister& src2) {

	2661 SimVRegister temp1, temp2;

	2662 sxtl(vform, temp1, src1);

	2663 sxtl(vform, temp2, src2);

	2664 mla(vform, dst, temp1, temp2);

	2665 return dst;

	2666 }

	2667

	2668 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,

	2669 const LogicVRegister& src1,

	2670 const LogicVRegister& src2) {

	2671 SimVRegister temp1, temp2;

	2672 sxtl2(vform, temp1, src1);

	2673 sxtl2(vform, temp2, src2);

	2674 mla(vform, dst, temp1, temp2);

	2675 return dst;

	2676 }

	2677

	2678 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,

	2679 const LogicVRegister& src1,

	2680 const LogicVRegister& src2) {

	2681 SimVRegister temp;

	2682 LogicVRegister product = sqdmull(vform, temp, src1, src2);

	2683 return add(vform, dst, dst, product).SignedSaturate(vform);

	2684 }

	2685

	2686 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,

	2687 const LogicVRegister& src1,

	2688 const LogicVRegister& src2) {

	2689 SimVRegister temp;

	2690 LogicVRegister product = sqdmull2(vform, temp, src1, src2);

	2691 return add(vform, dst, dst, product).SignedSaturate(vform);

	2692 }

	2693

	2694 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,

	2695 const LogicVRegister& src1,

	2696 const LogicVRegister& src2) {

	2697 SimVRegister temp;

	2698 LogicVRegister product = sqdmull(vform, temp, src1, src2);

	2699 return sub(vform, dst, dst, product).SignedSaturate(vform);

	2700 }

	2701

	2702 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,

	2703 const LogicVRegister& src1,

	2704 const LogicVRegister& src2) {

	2705 SimVRegister temp;

	2706 LogicVRegister product = sqdmull2(vform, temp, src1, src2);

	2707 return sub(vform, dst, dst, product).SignedSaturate(vform);

	2708 }

	2709

	2710 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,

	2711 const LogicVRegister& src1,

	2712 const LogicVRegister& src2) {

	2713 SimVRegister temp;

	2714 LogicVRegister product = smull(vform, temp, src1, src2);

	2715 return add(vform, dst, product, product).SignedSaturate(vform);

	2716 }

	2717

	2718 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,

	2719 const LogicVRegister& src1,

	2720 const LogicVRegister& src2) {

	2721 SimVRegister temp;

	2722 LogicVRegister product = smull2(vform, temp, src1, src2);

	2723 return add(vform, dst, product, product).SignedSaturate(vform);

	2724 }

	2725

	2726 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,

	2727 const LogicVRegister& src1,

	2728 const LogicVRegister& src2, bool round) {

	2729 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.

	2730 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)

	2731 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.

	2732

	2733 int esize = LaneSizeInBitsFromFormat(vform);

	2734 int round_const = round ? (1 << (esize - 2)) : 0;

	2735 int64_t product;

	2736

	2737 dst.ClearForWrite(vform);

	2738 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	2739 product = src1.Int(vform, i) * src2.Int(vform, i);

	2740 product += round_const;

	2741 product = product >> (esize - 1);

	2742

	2743 if (product > MaxIntFromFormat(vform)) {

	2744 product = MaxIntFromFormat(vform);

	2745 } else if (product < MinIntFromFormat(vform)) {

	2746 product = MinIntFromFormat(vform);

	2747 }

	2748 dst.SetInt(vform, i, product);

	2749 }

	2750 return dst;

	2751 }

	2752

	2753 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,

	2754 const LogicVRegister& src1,

	2755 const LogicVRegister& src2) {

	2756 return sqrdmulh(vform, dst, src1, src2, false);

	2757 }

	2758

	2759 LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst,

	2760 const LogicVRegister& src1,

	2761 const LogicVRegister& src2) {

	2762 SimVRegister temp;

	2763 add(VectorFormatDoubleWidth(vform), temp, src1, src2);

	2764 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2765 return dst;

	2766 }

	2767

	2768 LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst,

	2769 const LogicVRegister& src1,

	2770 const LogicVRegister& src2) {

	2771 SimVRegister temp;

	2772 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);

	2773 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2774 return dst;

	2775 }

	2776

	2777 LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst,

	2778 const LogicVRegister& src1,

	2779 const LogicVRegister& src2) {

	2780 SimVRegister temp;

	2781 add(VectorFormatDoubleWidth(vform), temp, src1, src2);

	2782 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2783 return dst;

	2784 }

	2785

	2786 LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst,

	2787 const LogicVRegister& src1,

	2788 const LogicVRegister& src2) {

	2789 SimVRegister temp;

	2790 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);

	2791 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2792 return dst;

	2793 }

	2794

	2795 LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst,

	2796 const LogicVRegister& src1,

	2797 const LogicVRegister& src2) {

	2798 SimVRegister temp;

	2799 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);

	2800 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2801 return dst;

	2802 }

	2803

	2804 LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst,

	2805 const LogicVRegister& src1,

	2806 const LogicVRegister& src2) {

	2807 SimVRegister temp;

	2808 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);

	2809 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2810 return dst;

	2811 }

	2812

	2813 LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst,

	2814 const LogicVRegister& src1,

	2815 const LogicVRegister& src2) {

	2816 SimVRegister temp;

	2817 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);

	2818 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2819 return dst;

	2820 }

	2821

	2822 LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst,

	2823 const LogicVRegister& src1,

	2824 const LogicVRegister& src2) {

	2825 SimVRegister temp;

	2826 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);

	2827 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2828 return dst;

	2829 }

	2830

	2831 LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst,

	2832 const LogicVRegister& src1,

	2833 const LogicVRegister& src2) {

	2834 uint64_t result[16];

	2835 int laneCount = LaneCountFromFormat(vform);

	2836 int pairs = laneCount / 2;

	2837 for (int i = 0; i < pairs; ++i) {

	2838 result[2 * i] = src1.Uint(vform, 2 * i);

	2839 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);

	2840 }

	2841

	2842 dst.SetUintArray(vform, result);

	2843 return dst;

	2844 }

	2845

	2846 LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst,

	2847 const LogicVRegister& src1,

	2848 const LogicVRegister& src2) {

	2849 uint64_t result[16];

	2850 int laneCount = LaneCountFromFormat(vform);

	2851 int pairs = laneCount / 2;

	2852 for (int i = 0; i < pairs; ++i) {

	2853 result[2 * i] = src1.Uint(vform, (2 * i) + 1);

	2854 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);

	2855 }

	2856

	2857 dst.SetUintArray(vform, result);

	2858 return dst;

	2859 }

	2860

	2861 LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst,

	2862 const LogicVRegister& src1,

	2863 const LogicVRegister& src2) {

	2864 uint64_t result[16];

	2865 int laneCount = LaneCountFromFormat(vform);

	2866 int pairs = laneCount / 2;

	2867 for (int i = 0; i < pairs; ++i) {

	2868 result[2 * i] = src1.Uint(vform, i);

	2869 result[(2 * i) + 1] = src2.Uint(vform, i);

	2870 }

	2871

	2872 dst.SetUintArray(vform, result);

	2873 return dst;

	2874 }

	2875

	2876 LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst,

	2877 const LogicVRegister& src1,

	2878 const LogicVRegister& src2) {

	2879 uint64_t result[16];

	2880 int laneCount = LaneCountFromFormat(vform);

	2881 int pairs = laneCount / 2;

	2882 for (int i = 0; i < pairs; ++i) {

	2883 result[2 * i] = src1.Uint(vform, pairs + i);

	2884 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);

	2885 }

	2886

	2887 dst.SetUintArray(vform, result);

	2888 return dst;

	2889 }

	2890

	2891 LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst,

	2892 const LogicVRegister& src1,

	2893 const LogicVRegister& src2) {

	2894 uint64_t result[32];

	2895 int laneCount = LaneCountFromFormat(vform);

	2896 for (int i = 0; i < laneCount; ++i) {

	2897 result[i] = src1.Uint(vform, i);

	2898 result[laneCount + i] = src2.Uint(vform, i);

	2899 }

	2900

	2901 dst.ClearForWrite(vform);

	2902 for (int i = 0; i < laneCount; ++i) {

	2903 dst.SetUint(vform, i, result[2 * i]);

	2904 }

	2905 return dst;

	2906 }

	2907

	2908 LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst,

	2909 const LogicVRegister& src1,

	2910 const LogicVRegister& src2) {

	2911 uint64_t result[32];

	2912 int laneCount = LaneCountFromFormat(vform);

	2913 for (int i = 0; i < laneCount; ++i) {

	2914 result[i] = src1.Uint(vform, i);

	2915 result[laneCount + i] = src2.Uint(vform, i);

	2916 }

	2917

	2918 dst.ClearForWrite(vform);

	2919 for (int i = 0; i < laneCount; ++i) {

	2920 dst.SetUint(vform, i, result[(2 * i) + 1]);

	2921 }

	2922 return dst;

	2923 }

	2924

	2925 template <typename T>

	2926 T Simulator::FPAdd(T op1, T op2) {

	2927 T result = FPProcessNaNs(op1, op2);

	2928 if (std::isnan(result)) return result;

	2929

	2930 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {

	2931 // inf + -inf returns the default NaN.

	2932 FPProcessException();

	2933 return FPDefaultNaN<T>();

	2934 } else {

	2935 // Other cases should be handled by standard arithmetic.

	2936 return op1 + op2;

	2937 }

	2938 }

	2939

	2940 template <typename T>

	2941 T Simulator::FPSub(T op1, T op2) {

	2942 // NaNs should be handled elsewhere.

	2943 DCHECK(!std::isnan(op1) && !std::isnan(op2));

	2944

	2945 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {

	2946 // inf - inf returns the default NaN.

	2947 FPProcessException();

	2948 return FPDefaultNaN<T>();

	2949 } else {

	2950 // Other cases should be handled by standard arithmetic.

	2951 return op1 - op2;

	2952 }

	2953 }

	2954

	2955 template <typename T>

	2956 T Simulator::FPMul(T op1, T op2) {

	2957 // NaNs should be handled elsewhere.

	2958 DCHECK(!std::isnan(op1) && !std::isnan(op2));

	2959

	2960 if ((std::isinf(op1) && (op2 == 0.0)) \|\| (std::isinf(op2) && (op1 == 0.0))) {

	2961 // inf * 0.0 returns the default NaN.

	2962 FPProcessException();

	2963 return FPDefaultNaN<T>();

	2964 } else {

	2965 // Other cases should be handled by standard arithmetic.

	2966 return op1 * op2;

	2967 }

	2968 }

	2969

	2970 template <typename T>

	2971 T Simulator::FPMulx(T op1, T op2) {

	2972 if ((std::isinf(op1) && (op2 == 0.0)) \|\| (std::isinf(op2) && (op1 == 0.0))) {

	2973 // inf * 0.0 returns +/-2.0.

	2974 T two = 2.0;

	2975 return copysign(1.0, op1) * copysign(1.0, op2) * two;

	2976 }

	2977 return FPMul(op1, op2);

	2978 }

	2979

	2980 template <typename T>

	2981 T Simulator::FPMulAdd(T a, T op1, T op2) {

	2982 T result = FPProcessNaNs3(a, op1, op2);

	2983

	2984 T sign_a = copysign(1.0, a);

	2985 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);

	2986 bool isinf_prod = std::isinf(op1) \|\| std::isinf(op2);

	2987 bool operation_generates_nan =

	2988 (std::isinf(op1) && (op2 == 0.0)) \|\| // inf * 0.0

	2989 (std::isinf(op2) && (op1 == 0.0)) \|\| // 0.0 * inf

	2990 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf

	2991

	2992 if (std::isnan(result)) {

	2993 // Generated NaNs override quiet NaNs propagated from a.

	2994 if (operation_generates_nan && IsQuietNaN(a)) {

	2995 FPProcessException();

	2996 return FPDefaultNaN<T>();

	2997 } else {

	2998 return result;

	2999 }

	3000 }

	3001

	3002 // If the operation would produce a NaN, return the default NaN.

	3003 if (operation_generates_nan) {

	3004 FPProcessException();

	3005 return FPDefaultNaN<T>();

	3006 }

	3007

	3008 // Work around broken fma implementations for exact zero results: The sign of

	3009 // exact 0.0 results is positive unless both a and op1 * op2 are negative.

	3010 if (((op1 == 0.0) \|\| (op2 == 0.0)) && (a == 0.0)) {

	3011 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;

	3012 }

	3013

	3014 result = FusedMultiplyAdd(op1, op2, a);

	3015 DCHECK(!std::isnan(result));

	3016

	3017 // Work around broken fma implementations for rounded zero results: If a is

	3018 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.

	3019 if ((a == 0.0) && (result == 0.0)) {

	3020 return copysign(0.0, sign_prod);

	3021 }

	3022

	3023 return result;

	3024 }

	3025

	3026 template <typename T>

	3027 T Simulator::FPDiv(T op1, T op2) {

	3028 // NaNs should be handled elsewhere.

	3029 DCHECK(!std::isnan(op1) && !std::isnan(op2));

	3030

	3031 if ((std::isinf(op1) && std::isinf(op2)) \|\| ((op1 == 0.0) && (op2 == 0.0))) {

	3032 // inf / inf and 0.0 / 0.0 return the default NaN.

	3033 FPProcessException();

	3034 return FPDefaultNaN<T>();

	3035 } else {

	3036 if (op2 == 0.0) {

	3037 FPProcessException();

	3038 if (!std::isnan(op1)) {

	3039 double op1_sign = copysign(1.0, op1);

	3040 double op2_sign = copysign(1.0, op2);

	3041 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);

	3042 }

	3043 }

	3044

	3045 // Other cases should be handled by standard arithmetic.

	3046 return op1 / op2;

	3047 }

	3048 }

	3049

	3050 template <typename T>

	3051 T Simulator::FPSqrt(T op) {

	3052 if (std::isnan(op)) {

	3053 return FPProcessNaN(op);

	3054 } else if (op < 0.0) {

	3055 FPProcessException();

	3056 return FPDefaultNaN<T>();

	3057 } else {

	3058 return sqrt(op);

	3059 }

	3060 }

	3061

	3062 template <typename T>

	3063 T Simulator::FPMax(T a, T b) {

	3064 T result = FPProcessNaNs(a, b);

	3065 if (std::isnan(result)) return result;

	3066

	3067 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {

	3068 // a and b are zero, and the sign differs: return +0.0.

	3069 return 0.0;

	3070 } else {

	3071 return (a > b) ? a : b;

	3072 }

	3073 }

	3074

	3075 template <typename T>

	3076 T Simulator::FPMaxNM(T a, T b) {

	3077 if (IsQuietNaN(a) && !IsQuietNaN(b)) {

	3078 a = kFP64NegativeInfinity;

	3079 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {

	3080 b = kFP64NegativeInfinity;

	3081 }

	3082

	3083 T result = FPProcessNaNs(a, b);

	3084 return std::isnan(result) ? result : FPMax(a, b);

	3085 }

	3086

	3087 template <typename T>

	3088 T Simulator::FPMin(T a, T b) {

	3089 T result = FPProcessNaNs(a, b);

	3090 if (std::isnan(result)) return result;

	3091

	3092 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {

	3093 // a and b are zero, and the sign differs: return -0.0.

	3094 return -0.0;

	3095 } else {

	3096 return (a < b) ? a : b;

	3097 }

	3098 }

	3099

	3100 template <typename T>

	3101 T Simulator::FPMinNM(T a, T b) {

	3102 if (IsQuietNaN(a) && !IsQuietNaN(b)) {

	3103 a = kFP64PositiveInfinity;

	3104 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {

	3105 b = kFP64PositiveInfinity;

	3106 }

	3107

	3108 T result = FPProcessNaNs(a, b);

	3109 return std::isnan(result) ? result : FPMin(a, b);

	3110 }

	3111

	3112 template <typename T>

	3113 T Simulator::FPRecipStepFused(T op1, T op2) {

	3114 const T two = 2.0;

	3115 if ((std::isinf(op1) && (op2 == 0.0)) \|\|

	3116 ((op1 == 0.0) && (std::isinf(op2)))) {

	3117 return two;

	3118 } else if (std::isinf(op1) \|\| std::isinf(op2)) {

	3119 // Return +inf if signs match, otherwise -inf.

	3120 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity

	3121 : kFP64NegativeInfinity;

	3122 } else {

	3123 return FusedMultiplyAdd(op1, op2, two);

	3124 }

	3125 }

	3126

	3127 template <typename T>

	3128 T Simulator::FPRSqrtStepFused(T op1, T op2) {

	3129 const T one_point_five = 1.5;

	3130 const T two = 2.0;

	3131

	3132 if ((std::isinf(op1) && (op2 == 0.0)) \|\|

	3133 ((op1 == 0.0) && (std::isinf(op2)))) {

	3134 return one_point_five;

	3135 } else if (std::isinf(op1) \|\| std::isinf(op2)) {

	3136 // Return +inf if signs match, otherwise -inf.

	3137 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity

	3138 : kFP64NegativeInfinity;

	3139 } else {

	3140 // The multiply-add-halve operation must be fully fused, so avoid interim

	3141 // rounding by checking which operand can be losslessly divided by two

	3142 // before doing the multiply-add.

	3143 if (std::isnormal(op1 / two)) {

	3144 return FusedMultiplyAdd(op1 / two, op2, one_point_five);

	3145 } else if (std::isnormal(op2 / two)) {

	3146 return FusedMultiplyAdd(op1, op2 / two, one_point_five);

	3147 } else {

	3148 // Neither operand is normal after halving: the result is dominated by

	3149 // the addition term, so just return that.

	3150 return one_point_five;

	3151 }

	3152 }

	3153 }

	3154

	3155 double Simulator::FPRoundInt(double value, FPRounding round_mode) {

	3156 if ((value == 0.0) \|\| (value == kFP64PositiveInfinity) \|\|

	3157 (value == kFP64NegativeInfinity)) {

	3158 return value;

	3159 } else if (std::isnan(value)) {

	3160 return FPProcessNaN(value);

	3161 }

	3162

	3163 double int_result = std::floor(value);

	3164 double error = value - int_result;

	3165 switch (round_mode) {

	3166 case FPTieAway: {

	3167 // Take care of correctly handling the range ]-0.5, -0.0], which must

	3168 // yield -0.0.

	3169 if ((-0.5 < value) && (value < 0.0)) {

	3170 int_result = -0.0;

	3171

	3172 } else if ((error > 0.5) \|\| ((error == 0.5) && (int_result >= 0.0))) {

	3173 // If the error is greater than 0.5, or is equal to 0.5 and the integer

	3174 // result is positive, round up.

	3175 int_result++;

	3176 }

	3177 break;

	3178 }

	3179 case FPTieEven: {

	3180 // Take care of correctly handling the range [-0.5, -0.0], which must

	3181 // yield -0.0.

	3182 if ((-0.5 <= value) && (value < 0.0)) {

	3183 int_result = -0.0;

	3184

	3185 // If the error is greater than 0.5, or is equal to 0.5 and the integer

	3186 // result is odd, round up.

	3187 } else if ((error > 0.5) \|\|

	3188 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {

	3189 int_result++;

	3190 }

	3191 break;

	3192 }

	3193 case FPZero: {

	3194 // If value>0 then we take floor(value)

	3195 // otherwise, ceil(value).

	3196 if (value < 0) {

	3197 int_result = ceil(value);

	3198 }

	3199 break;

	3200 }

	3201 case FPNegativeInfinity: {

	3202 // We always use floor(value).

	3203 break;

	3204 }

	3205 case FPPositiveInfinity: {

	3206 // Take care of correctly handling the range ]-1.0, -0.0], which must

	3207 // yield -0.0.

	3208 if ((-1.0 < value) && (value < 0.0)) {

	3209 int_result = -0.0;

	3210

	3211 // If the error is non-zero, round up.

	3212 } else if (error > 0.0) {

	3213 int_result++;

	3214 }

	3215 break;

	3216 }

	3217 default:

	3218 UNIMPLEMENTED();

	3219 }

	3220 return int_result;

	3221 }

	3222

	3223 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {

	3224 value = FPRoundInt(value, rmode);

	3225 if (value >= kWMaxInt) {

	3226 return kWMaxInt;

	3227 } else if (value < kWMinInt) {

	3228 return kWMinInt;

	3229 }

	3230 return std::isnan(value) ? 0 : static_cast<int32_t>(value);

	3231 }

	3232

	3233 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {

	3234 value = FPRoundInt(value, rmode);

	3235 if (value >= kXMaxInt) {

	3236 return kXMaxInt;

	3237 } else if (value < kXMinInt) {

	3238 return kXMinInt;

	3239 }

	3240 return std::isnan(value) ? 0 : static_cast<int64_t>(value);

	3241 }

	3242

	3243 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {

	3244 value = FPRoundInt(value, rmode);

	3245 if (value >= kWMaxUInt) {

	3246 return kWMaxUInt;

	3247 } else if (value < 0.0) {

	3248 return 0;

	3249 }

	3250 return std::isnan(value) ? 0 : static_cast<uint32_t>(value);

	3251 }

	3252

	3253 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {

	3254 value = FPRoundInt(value, rmode);

	3255 if (value >= kXMaxUInt) {

	3256 return kXMaxUInt;

	3257 } else if (value < 0.0) {

	3258 return 0;

	3259 }

	3260 return std::isnan(value) ? 0 : static_cast<uint64_t>(value);

	3261 }

	3262

	3263 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \

	3264 template <typename T> \

	3265 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \

	3266 const LogicVRegister& src1, \

	3267 const LogicVRegister& src2) { \

	3268 dst.ClearForWrite(vform); \

	3269 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \

	3270 T op1 = src1.Float<T>(i); \

	3271 T op2 = src2.Float<T>(i); \

	3272 T result; \

	3273 if (PROCNAN) { \

	3274 result = FPProcessNaNs(op1, op2); \

	3275 if (!std::isnan(result)) { \

	3276 result = OP(op1, op2); \

	3277 } \

	3278 } else { \

	3279 result = OP(op1, op2); \

	3280 } \

	3281 dst.SetFloat(i, result); \

	3282 } \

	3283 return dst; \

	3284 } \

	3285 \

	3286 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \

	3287 const LogicVRegister& src1, \

	3288 const LogicVRegister& src2) { \

	3289 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { \

	3290 FN<float>(vform, dst, src1, src2); \

	3291 } else { \

	3292 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); \

	3293 FN<double>(vform, dst, src1, src2); \

	3294 } \

	3295 return dst; \

	3296 }

	3297 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)

	3298 #undef DEFINE_NEON_FP_VECTOR_OP

	3299

	3300 LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst,

	3301 const LogicVRegister& src1,

	3302 const LogicVRegister& src2) {

	3303 SimVRegister temp;

	3304 LogicVRegister product = fmul(vform, temp, src1, src2);

	3305 return fneg(vform, dst, product);

	3306 }

	3307

	3308 template <typename T>

	3309 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,

	3310 const LogicVRegister& src1,

	3311 const LogicVRegister& src2) {

	3312 dst.ClearForWrite(vform);

	3313 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3314 T op1 = -src1.Float<T>(i);

	3315 T op2 = src2.Float<T>(i);

	3316 T result = FPProcessNaNs(op1, op2);

	3317 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));

	3318 }

	3319 return dst;

	3320 }

	3321

	3322 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,

	3323 const LogicVRegister& src1,

	3324 const LogicVRegister& src2) {

	3325 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3326 frecps<float>(vform, dst, src1, src2);

	3327 } else {

	3328 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3329 frecps<double>(vform, dst, src1, src2);

	3330 }

	3331 return dst;

	3332 }

	3333

	3334 template <typename T>

	3335 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,

	3336 const LogicVRegister& src1,

	3337 const LogicVRegister& src2) {

	3338 dst.ClearForWrite(vform);

	3339 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3340 T op1 = -src1.Float<T>(i);

	3341 T op2 = src2.Float<T>(i);

	3342 T result = FPProcessNaNs(op1, op2);

	3343 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));

	3344 }

	3345 return dst;

	3346 }

	3347

	3348 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,

	3349 const LogicVRegister& src1,

	3350 const LogicVRegister& src2) {

	3351 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3352 frsqrts<float>(vform, dst, src1, src2);

	3353 } else {

	3354 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3355 frsqrts<double>(vform, dst, src1, src2);

	3356 }

	3357 return dst;

	3358 }

	3359

	3360 template <typename T>

	3361 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,

	3362 const LogicVRegister& src1,

	3363 const LogicVRegister& src2, Condition cond) {

	3364 dst.ClearForWrite(vform);

	3365 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3366 bool result = false;

	3367 T op1 = src1.Float<T>(i);

	3368 T op2 = src2.Float<T>(i);

	3369 T nan_result = FPProcessNaNs(op1, op2);

	3370 if (!std::isnan(nan_result)) {

	3371 switch (cond) {

	3372 case eq:

	3373 result = (op1 == op2);

	3374 break;

	3375 case ge:

	3376 result = (op1 >= op2);

	3377 break;

	3378 case gt:

	3379 result = (op1 > op2);

	3380 break;

	3381 case le:

	3382 result = (op1 <= op2);

	3383 break;

	3384 case lt:

	3385 result = (op1 < op2);

	3386 break;

	3387 default:

	3388 UNREACHABLE();

	3389 break;

	3390 }

	3391 }

	3392 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);

	3393 }

	3394 return dst;

	3395 }

	3396

	3397 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,

	3398 const LogicVRegister& src1,

	3399 const LogicVRegister& src2, Condition cond) {

	3400 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3401 fcmp<float>(vform, dst, src1, src2, cond);

	3402 } else {

	3403 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3404 fcmp<double>(vform, dst, src1, src2, cond);

	3405 }

	3406 return dst;

	3407 }

	3408

	3409 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst,

	3410 const LogicVRegister& src, Condition cond) {

	3411 SimVRegister temp;

	3412 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3413 LogicVRegister zero_reg =

	3414 dup_immediate(vform, temp, bit_cast<uint32_t>(0.0f));

	3415 fcmp<float>(vform, dst, src, zero_reg, cond);

	3416 } else {

	3417 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3418 LogicVRegister zero_reg =

	3419 dup_immediate(vform, temp, bit_cast<uint64_t>(0.0));

	3420 fcmp<double>(vform, dst, src, zero_reg, cond);

	3421 }

	3422 return dst;

	3423 }

	3424

	3425 LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst,

	3426 const LogicVRegister& src1,

	3427 const LogicVRegister& src2, Condition cond) {

	3428 SimVRegister temp1, temp2;

	3429 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3430 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);

	3431 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);

	3432 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);

	3433 } else {

	3434 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3435 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);

	3436 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);

	3437 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);

	3438 }

	3439 return dst;

	3440 }

	3441

	3442 template <typename T>

	3443 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,

	3444 const LogicVRegister& src1,

	3445 const LogicVRegister& src2) {

	3446 dst.ClearForWrite(vform);

	3447 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3448 T op1 = src1.Float<T>(i);

	3449 T op2 = src2.Float<T>(i);

	3450 T acc = dst.Float<T>(i);

	3451 T result = FPMulAdd(acc, op1, op2);

	3452 dst.SetFloat(i, result);

	3453 }

	3454 return dst;

	3455 }

	3456

	3457 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,

	3458 const LogicVRegister& src1,

	3459 const LogicVRegister& src2) {

	3460 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3461 fmla<float>(vform, dst, src1, src2);

	3462 } else {

	3463 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3464 fmla<double>(vform, dst, src1, src2);

	3465 }

	3466 return dst;

	3467 }

	3468

	3469 template <typename T>

	3470 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,

	3471 const LogicVRegister& src1,

	3472 const LogicVRegister& src2) {

	3473 dst.ClearForWrite(vform);

	3474 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3475 T op1 = -src1.Float<T>(i);

	3476 T op2 = src2.Float<T>(i);

	3477 T acc = dst.Float<T>(i);

	3478 T result = FPMulAdd(acc, op1, op2);

	3479 dst.SetFloat(i, result);

	3480 }

	3481 return dst;

	3482 }

	3483

	3484 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,

	3485 const LogicVRegister& src1,

	3486 const LogicVRegister& src2) {

	3487 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3488 fmls<float>(vform, dst, src1, src2);

	3489 } else {

	3490 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3491 fmls<double>(vform, dst, src1, src2);

	3492 }

	3493 return dst;

	3494 }

	3495

	3496 template <typename T>

	3497 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,

	3498 const LogicVRegister& src) {

	3499 dst.ClearForWrite(vform);

	3500 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3501 T op = src.Float<T>(i);

	3502 op = -op;

	3503 dst.SetFloat(i, op);

	3504 }

	3505 return dst;

	3506 }

	3507

	3508 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,

	3509 const LogicVRegister& src) {

	3510 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3511 fneg<float>(vform, dst, src);

	3512 } else {

	3513 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3514 fneg<double>(vform, dst, src);

	3515 }

	3516 return dst;

	3517 }

	3518

	3519 template <typename T>

	3520 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,

	3521 const LogicVRegister& src) {

	3522 dst.ClearForWrite(vform);

	3523 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3524 T op = src.Float<T>(i);

	3525 if (copysign(1.0, op) < 0.0) {

	3526 op = -op;

	3527 }

	3528 dst.SetFloat(i, op);

	3529 }

	3530 return dst;

	3531 }

	3532

	3533 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,

	3534 const LogicVRegister& src) {

	3535 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3536 fabs_<float>(vform, dst, src);

	3537 } else {

	3538 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3539 fabs_<double>(vform, dst, src);

	3540 }

	3541 return dst;

	3542 }

	3543

	3544 LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst,

	3545 const LogicVRegister& src1,

	3546 const LogicVRegister& src2) {

	3547 SimVRegister temp;

	3548 fsub(vform, temp, src1, src2);

	3549 fabs_(vform, dst, temp);

	3550 return dst;

	3551 }

	3552

	3553 LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst,

	3554 const LogicVRegister& src) {

	3555 dst.ClearForWrite(vform);

	3556 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3557 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3558 float result = FPSqrt(src.Float<float>(i));

	3559 dst.SetFloat(i, result);

	3560 }

	3561 } else {

	3562 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3563 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3564 double result = FPSqrt(src.Float<double>(i));

	3565 dst.SetFloat(i, result);

	3566 }

	3567 }

	3568 return dst;

	3569 }

	3570

	3571 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \

	3572 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \

	3573 const LogicVRegister& src1, \

	3574 const LogicVRegister& src2) { \

	3575 SimVRegister temp1, temp2; \

	3576 uzp1(vform, temp1, src1, src2); \

	3577 uzp2(vform, temp2, src1, src2); \

	3578 FN(vform, dst, temp1, temp2); \

	3579 return dst; \

	3580 } \

	3581 \

	3582 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \

	3583 const LogicVRegister& src) { \

	3584 if (vform == kFormatS) { \

	3585 float result = OP(src.Float<float>(0), src.Float<float>(1)); \

	3586 dst.SetFloat(0, result); \

	3587 } else { \

	3588 DCHECK_EQ(vform, kFormatD); \

	3589 double result = OP(src.Float<double>(0), src.Float<double>(1)); \

	3590 dst.SetFloat(0, result); \

	3591 } \

	3592 dst.ClearForWrite(vform); \

	3593 return dst; \

	3594 }

	3595 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)

	3596 #undef DEFINE_NEON_FP_PAIR_OP

	3597

	3598 LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst,

	3599 const LogicVRegister& src, FPMinMaxOp Op) {

	3600 DCHECK_EQ(vform, kFormat4S);

	3601 USE(vform);

	3602 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));

	3603 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));

	3604 float result = (this->*Op)(result1, result2);

	3605 dst.ClearForWrite(kFormatS);

	3606 dst.SetFloat<float>(0, result);

	3607 return dst;

	3608 }

	3609

	3610 LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst,

	3611 const LogicVRegister& src) {

	3612 return FMinMaxV(vform, dst, src, &Simulator::FPMax);

	3613 }

	3614

	3615 LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst,

	3616 const LogicVRegister& src) {

	3617 return FMinMaxV(vform, dst, src, &Simulator::FPMin);

	3618 }

	3619

	3620 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst,

	3621 const LogicVRegister& src) {

	3622 return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM);

	3623 }

	3624

	3625 LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst,

	3626 const LogicVRegister& src) {

	3627 return FMinMaxV(vform, dst, src, &Simulator::FPMinNM);

	3628 }

	3629

	3630 LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst,

	3631 const LogicVRegister& src1,

	3632 const LogicVRegister& src2, int index) {

	3633 dst.ClearForWrite(vform);

	3634 SimVRegister temp;

	3635 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3636 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);

	3637 fmul<float>(vform, dst, src1, index_reg);

	3638 } else {

	3639 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3640 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);

	3641 fmul<double>(vform, dst, src1, index_reg);

	3642 }

	3643 return dst;

	3644 }

	3645

	3646 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,

	3647 const LogicVRegister& src1,

	3648 const LogicVRegister& src2, int index) {

	3649 dst.ClearForWrite(vform);

	3650 SimVRegister temp;

	3651 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3652 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);

	3653 fmla<float>(vform, dst, src1, index_reg);

	3654 } else {

	3655 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3656 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);

	3657 fmla<double>(vform, dst, src1, index_reg);

	3658 }

	3659 return dst;

	3660 }

	3661

	3662 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,

	3663 const LogicVRegister& src1,

	3664 const LogicVRegister& src2, int index) {

	3665 dst.ClearForWrite(vform);

	3666 SimVRegister temp;

	3667 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3668 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);

	3669 fmls<float>(vform, dst, src1, index_reg);

	3670 } else {

	3671 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3672 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);

	3673 fmls<double>(vform, dst, src1, index_reg);

	3674 }

	3675 return dst;

	3676 }

	3677

	3678 LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst,

	3679 const LogicVRegister& src1,

	3680 const LogicVRegister& src2, int index) {

	3681 dst.ClearForWrite(vform);

	3682 SimVRegister temp;

	3683 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3684 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);

	3685 fmulx<float>(vform, dst, src1, index_reg);

	3686

	3687 } else {

	3688 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3689 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);

	3690 fmulx<double>(vform, dst, src1, index_reg);

	3691 }

	3692 return dst;

	3693 }

	3694

	3695 LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst,

	3696 const LogicVRegister& src,

	3697 FPRounding rounding_mode,

	3698 bool inexact_exception) {

	3699 dst.ClearForWrite(vform);

	3700 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3701 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3702 float input = src.Float<float>(i);

	3703 float rounded = FPRoundInt(input, rounding_mode);

	3704 if (inexact_exception && !std::isnan(input) && (input != rounded)) {

	3705 FPProcessException();

	3706 }

	3707 dst.SetFloat<float>(i, rounded);

	3708 }

	3709 } else {

	3710 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3711 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3712 double input = src.Float<double>(i);

	3713 double rounded = FPRoundInt(input, rounding_mode);

	3714 if (inexact_exception && !std::isnan(input) && (input != rounded)) {

	3715 FPProcessException();

	3716 }

	3717 dst.SetFloat<double>(i, rounded);

	3718 }

	3719 }

	3720 return dst;

	3721 }

	3722

	3723 LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst,

	3724 const LogicVRegister& src,

	3725 FPRounding rounding_mode, int fbits) {

	3726 dst.ClearForWrite(vform);

	3727 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3728 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3729 float op = src.Float<float>(i) * std::pow(2.0f, fbits);

	3730 dst.SetInt(vform, i, FPToInt32(op, rounding_mode));

	3731 }

	3732 } else {

	3733 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3734 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3735 double op = src.Float<double>(i) * std::pow(2.0, fbits);

	3736 dst.SetInt(vform, i, FPToInt64(op, rounding_mode));

	3737 }

	3738 }

	3739 return dst;

	3740 }

	3741

	3742 LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst,

	3743 const LogicVRegister& src,

	3744 FPRounding rounding_mode, int fbits) {

	3745 dst.ClearForWrite(vform);

	3746 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3747 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3748 float op = src.Float<float>(i) * std::pow(2.0f, fbits);

	3749 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));

	3750 }

	3751 } else {

	3752 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3753 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3754 double op = src.Float<double>(i) * std::pow(2.0, fbits);

	3755 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));

	3756 }

	3757 }

	3758 return dst;

	3759 }

	3760

	3761 LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst,

	3762 const LogicVRegister& src) {

	3763 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3764 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {

	3765 dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));

	3766 }

	3767 } else {

	3768 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3769 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {

	3770 dst.SetFloat(i, FPToDouble(src.Float<float>(i)));

	3771 }

	3772 }

	3773 return dst;

	3774 }

	3775

	3776 LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst,

	3777 const LogicVRegister& src) {

	3778 int lane_count = LaneCountFromFormat(vform);

	3779 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3780 for (int i = 0; i < lane_count; i++) {

	3781 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));

	3782 }

	3783 } else {

	3784 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3785 for (int i = 0; i < lane_count; i++) {

	3786 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));

	3787 }

	3788 }

	3789 return dst;

	3790 }

	3791

	3792 LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst,

	3793 const LogicVRegister& src) {

	3794 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {

	3795 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3796 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));

	3797 }

	3798 } else {

	3799 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);

	3800 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3801 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));

	3802 }

	3803 }

	3804 return dst;

	3805 }

	3806

	3807 LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst,

	3808 const LogicVRegister& src) {

	3809 int lane_count = LaneCountFromFormat(vform) / 2;

	3810 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {

	3811 for (int i = lane_count - 1; i >= 0; i--) {

	3812 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));

	3813 }

	3814 } else {

	3815 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);

	3816 for (int i = lane_count - 1; i >= 0; i--) {

	3817 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));

	3818 }

	3819 }

	3820 return dst;

	3821 }

	3822

	3823 LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst,

	3824 const LogicVRegister& src) {

	3825 dst.ClearForWrite(vform);

	3826 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);

	3827 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3828 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));

	3829 }

	3830 return dst;

	3831 }

	3832

	3833 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst,

	3834 const LogicVRegister& src) {

	3835 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);

	3836 int lane_count = LaneCountFromFormat(vform) / 2;

	3837 for (int i = lane_count - 1; i >= 0; i--) {

	3838 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));

	3839 }

	3840 return dst;

	3841 }

	3842

	3843 // Based on reference C function recip_sqrt_estimate from ARM ARM.

	3844 double Simulator::recip_sqrt_estimate(double a) {

	3845 int q0, q1, s;

	3846 double r;

	3847 if (a < 0.5) {

	3848 q0 = static_cast<int>(a * 512.0);

	3849 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);

	3850 } else {

	3851 q1 = static_cast<int>(a * 256.0);

	3852 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);

	3853 }

	3854 s = static_cast<int>(256.0 * r + 0.5);

	3855 return static_cast<double>(s) / 256.0;

	3856 }

	3857

	3858 namespace {

	3859

	3860 inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {

	3861 return unsigned_bitextract_64(start_bit, end_bit, val);

	3862 }

	3863

	3864 } // anonymous namespace

	3865

	3866 template <typename T>

	3867 T Simulator::FPRecipSqrtEstimate(T op) {

	3868 static_assert(std::is_same<float, T>::value \|\| std::is_same<double, T>::value,

	3869 "T must be a float or double");

	3870

	3871 if (std::isnan(op)) {

	3872 return FPProcessNaN(op);

	3873 } else if (op == 0.0) {

	3874 if (copysign(1.0, op) < 0.0) {

	3875 return kFP64NegativeInfinity;

	3876 } else {

	3877 return kFP64PositiveInfinity;

	3878 }

	3879 } else if (copysign(1.0, op) < 0.0) {

	3880 FPProcessException();

	3881 return FPDefaultNaN<T>();

	3882 } else if (std::isinf(op)) {

	3883 return 0.0;

	3884 } else {

	3885 uint64_t fraction;

	3886 int32_t exp, result_exp;

	3887

	3888 if (sizeof(T) == sizeof(float)) {

	3889 exp = static_cast<int32_t>(float_exp(op));

	3890 fraction = float_mantissa(op);

	3891 fraction <<= 29;

	3892 } else {

	3893 exp = static_cast<int32_t>(double_exp(op));

	3894 fraction = double_mantissa(op);

	3895 }

	3896

	3897 if (exp == 0) {

	3898 while (Bits(fraction, 51, 51) == 0) {

	3899 fraction = Bits(fraction, 50, 0) << 1;

	3900 exp -= 1;

	3901 }

	3902 fraction = Bits(fraction, 50, 0) << 1;

	3903 }

	3904

	3905 double scaled;

	3906 if (Bits(exp, 0, 0) == 0) {

	3907 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);

	3908 } else {

	3909 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);

	3910 }

	3911

	3912 if (sizeof(T) == sizeof(float)) {

	3913 result_exp = (380 - exp) / 2;

	3914 } else {

	3915 result_exp = (3068 - exp) / 2;

	3916 }

	3917

	3918 uint64_t estimate = bit_cast<uint64_t>(recip_sqrt_estimate(scaled));

	3919

	3920 if (sizeof(T) == sizeof(float)) {

	3921 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));

	3922 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));

	3923 return float_pack(0, exp_bits, est_bits);

	3924 } else {

	3925 return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));

	3926 }

	3927 }

	3928 }

	3929

	3930 LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst,

	3931 const LogicVRegister& src) {

	3932 dst.ClearForWrite(vform);

	3933 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3934 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3935 float input = src.Float<float>(i);

	3936 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));

	3937 }

	3938 } else {

	3939 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3940 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3941 double input = src.Float<double>(i);

	3942 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));

	3943 }

	3944 }

	3945 return dst;

	3946 }

	3947

	3948 template <typename T>

	3949 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {

	3950 static_assert(std::is_same<float, T>::value \|\| std::is_same<double, T>::value,

	3951 "T must be a float or double");

	3952 uint32_t sign;

	3953

	3954 if (sizeof(T) == sizeof(float)) {

	3955 sign = float_sign(op);

	3956 } else {

	3957 sign = double_sign(op);

	3958 }

	3959

	3960 if (std::isnan(op)) {

	3961 return FPProcessNaN(op);

	3962 } else if (std::isinf(op)) {

	3963 return (sign == 1) ? -0.0 : 0.0;

	3964 } else if (op == 0.0) {

	3965 FPProcessException(); // FPExc_DivideByZero exception.

	3966 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;

	3967 } else if (((sizeof(T) == sizeof(float)) &&

	3968 (std::fabs(op) < std::pow(2.0, -128.0))) \|\|

	3969 ((sizeof(T) == sizeof(double)) &&

	3970 (std::fabs(op) < std::pow(2.0, -1024.0)))) {

	3971 bool overflow_to_inf = false;

	3972 switch (rounding) {

	3973 case FPTieEven:

	3974 overflow_to_inf = true;

	3975 break;

	3976 case FPPositiveInfinity:

	3977 overflow_to_inf = (sign == 0);

	3978 break;

	3979 case FPNegativeInfinity:

	3980 overflow_to_inf = (sign == 1);

	3981 break;

	3982 case FPZero:

	3983 overflow_to_inf = false;

	3984 break;

	3985 default:

	3986 break;

	3987 }

	3988 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.

	3989 if (overflow_to_inf) {

	3990 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;

	3991 } else {

	3992 // Return FPMaxNormal(sign).

	3993 if (sizeof(T) == sizeof(float)) {

	3994 return float_pack(sign, 0xfe, 0x07fffff);

	3995 } else {

	3996 return double_pack(sign, 0x7fe, 0x0fffffffffffffl);

	3997 }

	3998 }

	3999 } else {

	4000 uint64_t fraction;

	4001 int32_t exp, result_exp;

	4002 uint32_t sign;

	4003

	4004 if (sizeof(T) == sizeof(float)) {

	4005 sign = float_sign(op);

	4006 exp = static_cast<int32_t>(float_exp(op));

	4007 fraction = float_mantissa(op);

	4008 fraction <<= 29;

	4009 } else {

	4010 sign = double_sign(op);

	4011 exp = static_cast<int32_t>(double_exp(op));

	4012 fraction = double_mantissa(op);

	4013 }

	4014

	4015 if (exp == 0) {

	4016 if (Bits(fraction, 51, 51) == 0) {

	4017 exp -= 1;

	4018 fraction = Bits(fraction, 49, 0) << 2;

	4019 } else {

	4020 fraction = Bits(fraction, 50, 0) << 1;

	4021 }

	4022 }

	4023

	4024 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);

	4025

	4026 if (sizeof(T) == sizeof(float)) {

	4027 result_exp = 253 - exp;

	4028 } else {

	4029 result_exp = 2045 - exp;

	4030 }

	4031

	4032 double estimate = recip_estimate(scaled);

	4033

	4034 fraction = double_mantissa(estimate);

	4035 if (result_exp == 0) {

	4036 fraction = (UINT64_C(1) << 51) \| Bits(fraction, 51, 1);

	4037 } else if (result_exp == -1) {

	4038 fraction = (UINT64_C(1) << 50) \| Bits(fraction, 51, 2);

	4039 result_exp = 0;

	4040 }

	4041 if (sizeof(T) == sizeof(float)) {

	4042 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));

	4043 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));

	4044 return float_pack(sign, exp_bits, frac_bits);

	4045 } else {

	4046 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));

	4047 }

	4048 }

	4049 }

	4050

	4051 LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst,

	4052 const LogicVRegister& src, FPRounding round) {

	4053 dst.ClearForWrite(vform);

	4054 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	4055 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4056 float input = src.Float<float>(i);

	4057 dst.SetFloat(i, FPRecipEstimate<float>(input, round));

	4058 }

	4059 } else {

	4060 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	4061 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4062 double input = src.Float<double>(i);

	4063 dst.SetFloat(i, FPRecipEstimate<double>(input, round));

	4064 }

	4065 }

	4066 return dst;

	4067 }

	4068

	4069 LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst,

	4070 const LogicVRegister& src) {

	4071 dst.ClearForWrite(vform);

	4072 uint64_t operand;

	4073 uint32_t result;

	4074 double dp_operand, dp_result;

	4075 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4076 operand = src.Uint(vform, i);

	4077 if (operand <= 0x3FFFFFFF) {

	4078 result = 0xFFFFFFFF;

	4079 } else {

	4080 dp_operand = operand * std::pow(2.0, -32);

	4081 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);

	4082 result = static_cast<uint32_t>(dp_result);

	4083 }

	4084 dst.SetUint(vform, i, result);

	4085 }

	4086 return dst;

	4087 }

	4088

	4089 // Based on reference C function recip_estimate from ARM ARM.

	4090 double Simulator::recip_estimate(double a) {

	4091 int q, s;

	4092 double r;

	4093 q = static_cast<int>(a * 512.0);

	4094 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);

	4095 s = static_cast<int>(256.0 * r + 0.5);

	4096 return static_cast<double>(s) / 256.0;

	4097 }

	4098

	4099 LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst,

	4100 const LogicVRegister& src) {

	4101 dst.ClearForWrite(vform);

	4102 uint64_t operand;

	4103 uint32_t result;

	4104 double dp_operand, dp_result;

	4105 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4106 operand = src.Uint(vform, i);

	4107 if (operand <= 0x7FFFFFFF) {

	4108 result = 0xFFFFFFFF;

	4109 } else {

	4110 dp_operand = operand * std::pow(2.0, -32);

	4111 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);

	4112 result = static_cast<uint32_t>(dp_result);

	4113 }

	4114 dst.SetUint(vform, i, result);

	4115 }

	4116 return dst;

	4117 }

	4118

	4119 template <typename T>

	4120 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,

	4121 const LogicVRegister& src) {

	4122 dst.ClearForWrite(vform);

	4123 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4124 T op = src.Float<T>(i);

	4125 T result;

	4126 if (std::isnan(op)) {

	4127 result = FPProcessNaN(op);

	4128 } else {

	4129 int exp;

	4130 uint32_t sign;

	4131 if (sizeof(T) == sizeof(float)) {

	4132 sign = float_sign(op);

	4133 exp = static_cast<int>(float_exp(op));

	4134 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));

	4135 result = float_pack(sign, exp, 0);

	4136 } else {

	4137 sign = double_sign(op);

	4138 exp = static_cast<int>(double_exp(op));

	4139 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));

	4140 result = double_pack(sign, exp, 0);

	4141 }

	4142 }

	4143 dst.SetFloat(i, result);

	4144 }

	4145 return dst;

	4146 }

	4147

	4148 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,

	4149 const LogicVRegister& src) {

	4150 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	4151 frecpx<float>(vform, dst, src);

	4152 } else {

	4153 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	4154 frecpx<double>(vform, dst, src);

	4155 }

	4156 return dst;

	4157 }

	4158

	4159 LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst,

	4160 const LogicVRegister& src, int fbits,

	4161 FPRounding round) {

	4162 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4163 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	4164 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);

	4165 dst.SetFloat<float>(i, result);

	4166 } else {

	4167 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	4168 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);

	4169 dst.SetFloat<double>(i, result);

	4170 }

	4171 }

	4172 return dst;

	4173 }

	4174

	4175 LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst,

	4176 const LogicVRegister& src, int fbits,

	4177 FPRounding round) {

	4178 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4179 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	4180 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);

	4181 dst.SetFloat<float>(i, result);

	4182 } else {

	4183 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	4184 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);

	4185 dst.SetFloat<double>(i, result);

	4186 }

	4187 }

	4188 return dst;

	4189 }

	4190

	4191 #endif // USE_SIMULATOR

	4192

	4193 } // namespace internal

	4194 } // namespace v8

	4195

	4196 #endif // V8_TARGET_ARCH_ARM64

OLD	NEW

« src/arm64/simulator-arm64.h ('K') | « src/arm64/simulator-arm64.cc ('k') | src/arm64/utils-arm64.h » ('j') | test/cctest/cctest.cc » ('J')