src/arm64/logic-arm64.cc - Issue 2622643005: ARM64: Add NEON support

Side by Side Diff: src/arm64/logic-arm64.cc

Issue 2622643005: ARM64: Add NEON support (Closed)

Patch Set: Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 // Copyright 2016 the V8 project authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #if V8_TARGET_ARCH_ARM64

	6

	7 #include <cmath>

	8 #include "src/arm64/simulator-arm64.h"
	bbudge 2017/01/31 01:41:32 Did you pull this out of simulator-arm64.cc becaus Did you pull this out of simulator-arm64.cc because of file size? Perhaps the file name should reflect this and cause the files to be grouped together alphabetically, e.g. simulator-neon-arm64.cc simulator-logic-arm64.cc martyn.capewell 2017/02/03 11:01:31 This file contains the more complicated simulator Show quoted text On 2017/01/31 01:41:32, bbudge wrote: > Did you pull this out of simulator-arm64.cc because of file size? Perhaps the > file name should reflect this and cause the files to be grouped together > alphabetically, e.g. > simulator-neon-arm64.cc > simulator-logic-arm64.cc This file contains the more complicated simulator operations, some of which were already defined for floating point instructions, but needed to be iterated over lanes for NEON. I think these were originally in a separate class, but they needed to share too much information with the Simulator, so they returned to the Simulator, but stayed in this new file, as there was still a conceptual gap. I can rename this file simulator-logic-arm64.cc, if that works? bbudge 2017/02/08 01:39:11 That's fine. Show quoted text On 2017/02/03 11:01:31, martyn.capewell wrote: > On 2017/01/31 01:41:32, bbudge wrote: > > Did you pull this out of simulator-arm64.cc because of file size? Perhaps the > > file name should reflect this and cause the files to be grouped together > > alphabetically, e.g. > > simulator-neon-arm64.cc > > simulator-logic-arm64.cc > > This file contains the more complicated simulator operations, some of which were > already defined for floating point instructions, but needed to be iterated over > lanes for NEON. > > I think these were originally in a separate class, but they needed to share too > much information with the Simulator, so they returned to the Simulator, but > stayed in this new file, as there was still a conceptual gap. I can rename this > file simulator-logic-arm64.cc, if that works? That's fine. martyn.capewell 2017/02/15 11:51:00 Done. Show quoted text On 2017/02/08 01:39:11, bbudge wrote: > On 2017/02/03 11:01:31, martyn.capewell wrote: > > On 2017/01/31 01:41:32, bbudge wrote: > > > Did you pull this out of simulator-arm64.cc because of file size? Perhaps > the > > > file name should reflect this and cause the files to be grouped together > > > alphabetically, e.g. > > > simulator-neon-arm64.cc > > > simulator-logic-arm64.cc > > > > This file contains the more complicated simulator operations, some of which > were > > already defined for floating point instructions, but needed to be iterated > over > > lanes for NEON. > > > > I think these were originally in a separate class, but they needed to share > too > > much information with the Simulator, so they returned to the Simulator, but > > stayed in this new file, as there was still a conceptual gap. I can rename > this > > file simulator-logic-arm64.cc, if that works? > > That's fine. Done.
	9

	10 namespace v8 {

	11 namespace internal {

	12

	13 #if defined(USE_SIMULATOR)

	14

	15 template <>

	16 double Simulator::FPDefaultNaN<double>() {

	17 return kFP64DefaultNaN;

	18 }
	bbudge 2017/01/31 01:41:32 Why not define these inline in the header, simulat Why not define these inline in the header, simulator-arm64.h, since they're used there? martyn.capewell 2017/02/03 11:01:31 Done. Show quoted text On 2017/01/31 01:41:32, bbudge wrote: > Why not define these inline in the header, simulator-arm64.h, since they're used > there? Done.
	19

	20 template <>

	21 float Simulator::FPDefaultNaN<float>() {

	22 return kFP32DefaultNaN;

	23 }

	24

	25 namespace {

	26

	27 // See FPRound for a description of this function.

	28 inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa,

	29 FPRounding round_mode) {

	30 uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(

	31 sign, exponent, mantissa, round_mode);

	32 return bit_cast<double>(bits);

	33 }

	34

	35 // See FPRound for a description of this function.

	36 inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa,

	37 FPRounding round_mode) {

	38 uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(

	39 sign, exponent, mantissa, round_mode);

	40 return bit_cast<float>(bits);

	41 }

	42

	43 // See FPRound for a description of this function.

	44 inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent,

	45 uint64_t mantissa, FPRounding round_mode) {

	46 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(

	47 sign, exponent, mantissa, round_mode);

	48 }

	49

	50 } // anonymous namespace
	bbudge 2017/01/31 01:41:32 nit: just 'namespace' nit: just 'namespace' martyn.capewell 2017/02/03 11:01:31 Done. Show quoted text On 2017/01/31 01:41:32, bbudge wrote: > nit: just 'namespace' Done.
	51

	52 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {

	53 if (src >= 0) {

	54 return UFixedToDouble(src, fbits, round);

	55 } else if (src == INT64_MIN) {

	56 return -UFixedToDouble(src, fbits, round);

	57 } else {

	58 return -UFixedToDouble(-src, fbits, round);

	59 }

	60 }

	61

	62 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {

	63 // An input of 0 is a special case because the result is effectively

	64 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.

	65 if (src == 0) {

	66 return 0.0;

	67 }

	68

	69 // Calculate the exponent. The highest significant bit will have the value

	70 // 2^exponent.

	71 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);

	72 const int64_t exponent = highest_significant_bit - fbits;

	73

	74 return FPRoundToDouble(0, exponent, src, round);

	75 }

	76

	77 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {

	78 if (src >= 0) {

	79 return UFixedToFloat(src, fbits, round);

	80 } else if (src == INT64_MIN) {

	81 return -UFixedToFloat(src, fbits, round);

	82 } else {

	83 return -UFixedToFloat(-src, fbits, round);

	84 }

	85 }

	86

	87 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {

	88 // An input of 0 is a special case because the result is effectively

	89 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.

	90 if (src == 0) {

	91 return 0.0f;

	92 }

	93

	94 // Calculate the exponent. The highest significant bit will have the value

	95 // 2^exponent.

	96 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);

	97 const int32_t exponent = highest_significant_bit - fbits;

	98

	99 return FPRoundToFloat(0, exponent, src, round);

	100 }

	101

	102 double Simulator::FPToDouble(float value) {

	103 switch (std::fpclassify(value)) {

	104 case FP_NAN: {

	105 if (IsSignallingNaN(value)) {

	106 FPProcessException();

	107 }

	108 if (DN()) return kFP64DefaultNaN;

	109

	110 // Convert NaNs as the processor would:

	111 // - The sign is propagated.

	112 // - The payload (mantissa) is transferred entirely, except that the top
	bbudge 2017/01/31 01:41:32 s/payload/mantissa ? s/payload/mantissa ? martyn.capewell 2017/02/03 11:01:31 Not sure what you mean here. In the context of a N Show quoted text On 2017/01/31 01:41:32, bbudge wrote: > s/payload/mantissa ? Not sure what you mean here. In the context of a NaN, it's called a "payload", as it has no numerical meaning. It just happens to occupy the same bits as a mantissa would were the representation a number. bbudge 2017/02/08 01:39:11 For consistency. It looks like 'mantissa' is used Show quoted text On 2017/02/03 11:01:31, martyn.capewell wrote: > On 2017/01/31 01:41:32, bbudge wrote: > > s/payload/mantissa ? > > Not sure what you mean here. In the context of a NaN, it's called a "payload", > as it has no numerical meaning. It just happens to occupy the same bits as a > mantissa would were the representation a number. For consistency. It looks like 'mantissa' is used in the other conversion functions here. martyn.capewell 2017/02/15 11:51:00 Done. Show quoted text On 2017/02/08 01:39:11, bbudge wrote: > On 2017/02/03 11:01:31, martyn.capewell wrote: > > On 2017/01/31 01:41:32, bbudge wrote: > > > s/payload/mantissa ? > > > > Not sure what you mean here. In the context of a NaN, it's called a "payload", > > as it has no numerical meaning. It just happens to occupy the same bits as a > > mantissa would were the representation a number. > > For consistency. It looks like 'mantissa' is used in the other conversion > functions here. Done.
	113 // bit is forced to '1', making the result a quiet NaN. The unused

	114 // (low-order) payload bits are set to 0.

	115 uint32_t raw = bit_cast<uint32_t>(value);

	116

	117 uint64_t sign = raw >> 31;

	118 uint64_t exponent = (1 << kDoubleExponentBits) - 1;

	119 uint64_t payload = unsigned_bitextract_64(21, 0, raw);

	120

	121 // Unused low-order bits remain zero.

	122 payload <<= (kDoubleMantissaBits - kFloatMantissaBits);

	123

	124 // Force a quiet NaN.

	125 payload \|= (UINT64_C(1) << (kDoubleMantissaBits - 1));

	126

	127 return double_pack(sign, exponent, payload);

	128 }

	129

	130 case FP_ZERO:

	131 case FP_NORMAL:

	132 case FP_SUBNORMAL:

	133 case FP_INFINITE: {

	134 // All other inputs are preserved in a standard cast, because every value

	135 // representable using an IEEE-754 float is also representable using an

	136 // IEEE-754 double.

	137 return static_cast<double>(value);

	138 }

	139 }

	140

	141 UNREACHABLE();

	142 return kFP64DefaultNaN;

	143 }

	144

	145 float Simulator::FPToFloat(float16 value) {

	146 uint32_t sign = value >> 15;

	147 uint32_t exponent =

	148 unsigned_bitextract_32(kFloat16MantissaBits + kFloat16ExponentBits - 1,

	149 kFloat16MantissaBits, value);

	150 uint32_t mantissa =

	151 unsigned_bitextract_32(kFloat16MantissaBits - 1, 0, value);

	152

	153 switch (float16classify(value)) {

	154 case FP_ZERO:

	155 return (sign == 0) ? 0.0f : -0.0f;

	156

	157 case FP_INFINITE:

	158 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;

	159

	160 case FP_SUBNORMAL: {

	161 // Calculate shift required to put mantissa into the most-significant bits

	162 // of the destination mantissa.

	163 int shift = CountLeadingZeros(mantissa << (32 - 10), 32);

	164

	165 // Shift mantissa and discard implicit '1'.

	166 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;

	167 mantissa &= (1 << kFloatMantissaBits) - 1;

	168

	169 // Adjust the exponent for the shift applied, and rebias.

	170 exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias);

	171 break;

	172 }

	173

	174 case FP_NAN: {

	175 if (IsSignallingNaN(value)) {

	176 FPProcessException();

	177 }

	178 if (DN()) return kFP32DefaultNaN;

	179

	180 // Convert NaNs as the processor would:

	181 // - The sign is propagated.

	182 // - The payload (mantissa) is transferred entirely, except that the top

	183 // bit is forced to '1', making the result a quiet NaN. The unused

	184 // (low-order) payload bits are set to 0.

	185 exponent = (1 << kFloatExponentBits) - 1;

	186

	187 // Increase bits in mantissa, making low-order bits 0.

	188 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);

	189 mantissa \|= 1 << (kFloatMantissaBits - 1); // Force a quiet NaN.

	190 break;

	191 }

	192

	193 case FP_NORMAL: {

	194 // Increase bits in mantissa, making low-order bits 0.

	195 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);

	196

	197 // Change exponent bias.

	198 exponent += (kFloatExponentBias - kFloat16ExponentBias);

	199 break;

	200 }

	201

	202 default:

	203 UNREACHABLE();

	204 return kFP32DefaultNaN;

	205 }

	206 return float_pack(sign, exponent, mantissa);

	207 }

	208

	209 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {

	210 // Only the FPTieEven rounding mode is implemented.

	211 DCHECK_EQ(round_mode, FPTieEven);

	212 USE(round_mode);

	213

	214 int64_t sign = float_sign(value);

	215 int64_t exponent =

	216 static_cast<int64_t>(float_exp(value)) - kFloatExponentBias;

	217 uint32_t mantissa = float_mantissa(value);

	218

	219 switch (std::fpclassify(value)) {

	220 case FP_NAN: {

	221 if (IsSignallingNaN(value)) {

	222 FPProcessException();

	223 }

	224 if (DN()) return kFP16DefaultNaN;

	225

	226 // Convert NaNs as the processor would:

	227 // - The sign is propagated.

	228 // - The payload (mantissa) is transferred as much as possible, except

	229 // that the top bit is forced to '1', making the result a quiet NaN.

	230 float16 result =

	231 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;

	232 result \|= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);

	233 result \|= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;

	234 return result;

	235 }

	236

	237 case FP_ZERO:

	238 return (sign == 0) ? 0 : 0x8000;

	239

	240 case FP_INFINITE:

	241 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;

	242

	243 case FP_NORMAL:

	244 case FP_SUBNORMAL: {

	245 // Convert float-to-half as the processor would, assuming that FPCR.FZ

	246 // (flush-to-zero) is not set.

	247

	248 // Add the implicit '1' bit to the mantissa.

	249 mantissa += (1 << kFloatMantissaBits);

	250 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);

	251 }

	252 }

	253

	254 UNREACHABLE();

	255 return kFP16DefaultNaN;

	256 }

	257

	258 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {

	259 // Only the FPTieEven rounding mode is implemented.

	260 DCHECK_EQ(round_mode, FPTieEven);

	261 USE(round_mode);

	262

	263 int64_t sign = double_sign(value);

	264 int64_t exponent =

	265 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;

	266 uint64_t mantissa = double_mantissa(value);

	267

	268 switch (std::fpclassify(value)) {

	269 case FP_NAN: {

	270 if (IsSignallingNaN(value)) {

	271 FPProcessException();

	272 }

	273 if (DN()) return kFP16DefaultNaN;

	274

	275 // Convert NaNs as the processor would:

	276 // - The sign is propagated.

	277 // - The payload (mantissa) is transferred as much as possible, except

	278 // that the top bit is forced to '1', making the result a quiet NaN.

	279 float16 result =

	280 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;

	281 result \|= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);

	282 result \|= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;

	283 return result;

	284 }

	285

	286 case FP_ZERO:

	287 return (sign == 0) ? 0 : 0x8000;

	288

	289 case FP_INFINITE:

	290 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;

	291

	292 case FP_NORMAL:

	293 case FP_SUBNORMAL: {

	294 // Convert double-to-half as the processor would, assuming that FPCR.FZ

	295 // (flush-to-zero) is not set.

	296

	297 // Add the implicit '1' bit to the mantissa.

	298 mantissa += (UINT64_C(1) << kDoubleMantissaBits);

	299 return FPRoundToFloat16(sign, exponent, mantissa, round_mode);

	300 }

	301 }

	302

	303 UNREACHABLE();

	304 return kFP16DefaultNaN;

	305 }

	306

	307 float Simulator::FPToFloat(double value, FPRounding round_mode) {

	308 // Only the FPTieEven rounding mode is implemented.

	309 DCHECK((round_mode == FPTieEven) \|\| (round_mode == FPRoundOdd));

	310 USE(round_mode);

	311

	312 switch (std::fpclassify(value)) {

	313 case FP_NAN: {

	314 if (IsSignallingNaN(value)) {

	315 FPProcessException();

	316 }

	317 if (DN()) return kFP32DefaultNaN;

	318

	319 // Convert NaNs as the processor would:

	320 // - The sign is propagated.

	321 // - The payload (mantissa) is transferred as much as possible, except

	322 // that the top bit is forced to '1', making the result a quiet NaN.
	bbudge 2017/01/31 01:41:32 s/payload/mantissa s/payload/mantissa martyn.capewell 2017/02/15 11:51:02 Done. Show quoted text On 2017/01/31 01:41:32, bbudge wrote: > s/payload/mantissa Done.
	323

	324 uint64_t raw = bit_cast<uint64_t>(value);

	325

	326 uint32_t sign = raw >> 63;

	327 uint32_t exponent = (1 << 8) - 1;

	328 uint32_t payload = static_cast<uint32_t>(unsigned_bitextract_64(

	329 50, kDoubleMantissaBits - kFloatMantissaBits, raw));

	330 payload \|= (1 << (kFloatMantissaBits - 1)); // Force a quiet NaN.

	331

	332 return float_pack(sign, exponent, payload);

	333 }

	334

	335 case FP_ZERO:

	336 case FP_INFINITE: {

	337 // In a C++ cast, any value representable in the target type will be

	338 // unchanged. This is always the case for +/-0.0 and infinities.

	339 return static_cast<float>(value);

	340 }

	341

	342 case FP_NORMAL:

	343 case FP_SUBNORMAL: {

	344 // Convert double-to-float as the processor would, assuming that FPCR.FZ

	345 // (flush-to-zero) is not set.

	346 uint32_t sign = double_sign(value);

	347 int64_t exponent =

	348 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;

	349 uint64_t mantissa = double_mantissa(value);

	350 if (std::fpclassify(value) == FP_NORMAL) {

	351 // For normal FP values, add the hidden bit.

	352 mantissa \|= (UINT64_C(1) << kDoubleMantissaBits);

	353 }

	354 return FPRoundToFloat(sign, exponent, mantissa, round_mode);

	355 }

	356 }

	357

	358 UNREACHABLE();

	359 return kFP32DefaultNaN;

	360 }

	361

	362 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {

	363 dst.ClearForWrite(vform);

	364 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	365 dst.ReadUintFromMem(vform, i, addr);

	366 addr += LaneSizeInBytesFromFormat(vform);

	367 }

	368 }

	369

	370 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index,

	371 uint64_t addr) {

	372 dst.ReadUintFromMem(vform, index, addr);

	373 }

	374

	375 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {

	376 dst.ClearForWrite(vform);

	377 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	378 dst.ReadUintFromMem(vform, i, addr);

	379 }

	380 }

	381

	382 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,

	383 LogicVRegister dst2, uint64_t addr1) {

	384 dst1.ClearForWrite(vform);

	385 dst2.ClearForWrite(vform);

	386 int esize = LaneSizeInBytesFromFormat(vform);

	387 uint64_t addr2 = addr1 + esize;

	388 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	389 dst1.ReadUintFromMem(vform, i, addr1);

	390 dst2.ReadUintFromMem(vform, i, addr2);

	391 addr1 += 2 * esize;

	392 addr2 += 2 * esize;

	393 }

	394 }

	395

	396 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,

	397 LogicVRegister dst2, int index, uint64_t addr1) {

	398 dst1.ClearForWrite(vform);

	399 dst2.ClearForWrite(vform);

	400 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);

	401 dst1.ReadUintFromMem(vform, index, addr1);

	402 dst2.ReadUintFromMem(vform, index, addr2);

	403 }

	404

	405 void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1,

	406 LogicVRegister dst2, uint64_t addr) {

	407 dst1.ClearForWrite(vform);

	408 dst2.ClearForWrite(vform);

	409 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);

	410 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	411 dst1.ReadUintFromMem(vform, i, addr);

	412 dst2.ReadUintFromMem(vform, i, addr2);

	413 }

	414 }

	415

	416 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,

	417 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) {

	418 dst1.ClearForWrite(vform);

	419 dst2.ClearForWrite(vform);

	420 dst3.ClearForWrite(vform);

	421 int esize = LaneSizeInBytesFromFormat(vform);

	422 uint64_t addr2 = addr1 + esize;

	423 uint64_t addr3 = addr2 + esize;

	424 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	425 dst1.ReadUintFromMem(vform, i, addr1);

	426 dst2.ReadUintFromMem(vform, i, addr2);

	427 dst3.ReadUintFromMem(vform, i, addr3);

	428 addr1 += 3 * esize;

	429 addr2 += 3 * esize;

	430 addr3 += 3 * esize;

	431 }

	432 }

	433

	434 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,

	435 LogicVRegister dst2, LogicVRegister dst3, int index,

	436 uint64_t addr1) {

	437 dst1.ClearForWrite(vform);

	438 dst2.ClearForWrite(vform);

	439 dst3.ClearForWrite(vform);

	440 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);

	441 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);

	442 dst1.ReadUintFromMem(vform, index, addr1);

	443 dst2.ReadUintFromMem(vform, index, addr2);

	444 dst3.ReadUintFromMem(vform, index, addr3);

	445 }

	446

	447 void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1,

	448 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) {

	449 dst1.ClearForWrite(vform);

	450 dst2.ClearForWrite(vform);

	451 dst3.ClearForWrite(vform);

	452 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);

	453 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);

	454 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	455 dst1.ReadUintFromMem(vform, i, addr);

	456 dst2.ReadUintFromMem(vform, i, addr2);

	457 dst3.ReadUintFromMem(vform, i, addr3);

	458 }

	459 }

	460

	461 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,

	462 LogicVRegister dst2, LogicVRegister dst3,

	463 LogicVRegister dst4, uint64_t addr1) {

	464 dst1.ClearForWrite(vform);

	465 dst2.ClearForWrite(vform);

	466 dst3.ClearForWrite(vform);

	467 dst4.ClearForWrite(vform);

	468 int esize = LaneSizeInBytesFromFormat(vform);

	469 uint64_t addr2 = addr1 + esize;

	470 uint64_t addr3 = addr2 + esize;

	471 uint64_t addr4 = addr3 + esize;

	472 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	473 dst1.ReadUintFromMem(vform, i, addr1);

	474 dst2.ReadUintFromMem(vform, i, addr2);

	475 dst3.ReadUintFromMem(vform, i, addr3);

	476 dst4.ReadUintFromMem(vform, i, addr4);

	477 addr1 += 4 * esize;

	478 addr2 += 4 * esize;

	479 addr3 += 4 * esize;

	480 addr4 += 4 * esize;

	481 }

	482 }

	483

	484 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,

	485 LogicVRegister dst2, LogicVRegister dst3,

	486 LogicVRegister dst4, int index, uint64_t addr1) {

	487 dst1.ClearForWrite(vform);

	488 dst2.ClearForWrite(vform);

	489 dst3.ClearForWrite(vform);

	490 dst4.ClearForWrite(vform);

	491 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);

	492 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);

	493 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);

	494 dst1.ReadUintFromMem(vform, index, addr1);

	495 dst2.ReadUintFromMem(vform, index, addr2);

	496 dst3.ReadUintFromMem(vform, index, addr3);

	497 dst4.ReadUintFromMem(vform, index, addr4);

	498 }

	499

	500 void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1,

	501 LogicVRegister dst2, LogicVRegister dst3,

	502 LogicVRegister dst4, uint64_t addr) {

	503 dst1.ClearForWrite(vform);

	504 dst2.ClearForWrite(vform);

	505 dst3.ClearForWrite(vform);

	506 dst4.ClearForWrite(vform);

	507 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);

	508 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);

	509 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);

	510 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	511 dst1.ReadUintFromMem(vform, i, addr);

	512 dst2.ReadUintFromMem(vform, i, addr2);

	513 dst3.ReadUintFromMem(vform, i, addr3);

	514 dst4.ReadUintFromMem(vform, i, addr4);

	515 }

	516 }

	517

	518 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {

	519 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	520 src.WriteUintToMem(vform, i, addr);

	521 addr += LaneSizeInBytesFromFormat(vform);

	522 }

	523 }

	524

	525 void Simulator::st1(VectorFormat vform, LogicVRegister src, int index,

	526 uint64_t addr) {

	527 src.WriteUintToMem(vform, index, addr);

	528 }

	529

	530 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,

	531 uint64_t addr) {

	532 int esize = LaneSizeInBytesFromFormat(vform);

	533 uint64_t addr2 = addr + esize;

	534 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	535 dst.WriteUintToMem(vform, i, addr);

	536 dst2.WriteUintToMem(vform, i, addr2);

	537 addr += 2 * esize;

	538 addr2 += 2 * esize;

	539 }

	540 }

	541

	542 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,

	543 int index, uint64_t addr) {

	544 int esize = LaneSizeInBytesFromFormat(vform);

	545 dst.WriteUintToMem(vform, index, addr);

	546 dst2.WriteUintToMem(vform, index, addr + 1 * esize);

	547 }

	548

	549 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,

	550 LogicVRegister dst3, uint64_t addr) {

	551 int esize = LaneSizeInBytesFromFormat(vform);

	552 uint64_t addr2 = addr + esize;

	553 uint64_t addr3 = addr2 + esize;

	554 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	555 dst.WriteUintToMem(vform, i, addr);

	556 dst2.WriteUintToMem(vform, i, addr2);

	557 dst3.WriteUintToMem(vform, i, addr3);

	558 addr += 3 * esize;

	559 addr2 += 3 * esize;

	560 addr3 += 3 * esize;

	561 }

	562 }

	563

	564 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,

	565 LogicVRegister dst3, int index, uint64_t addr) {

	566 int esize = LaneSizeInBytesFromFormat(vform);

	567 dst.WriteUintToMem(vform, index, addr);

	568 dst2.WriteUintToMem(vform, index, addr + 1 * esize);

	569 dst3.WriteUintToMem(vform, index, addr + 2 * esize);

	570 }

	571

	572 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,

	573 LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) {

	574 int esize = LaneSizeInBytesFromFormat(vform);

	575 uint64_t addr2 = addr + esize;

	576 uint64_t addr3 = addr2 + esize;

	577 uint64_t addr4 = addr3 + esize;

	578 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	579 dst.WriteUintToMem(vform, i, addr);

	580 dst2.WriteUintToMem(vform, i, addr2);

	581 dst3.WriteUintToMem(vform, i, addr3);

	582 dst4.WriteUintToMem(vform, i, addr4);

	583 addr += 4 * esize;

	584 addr2 += 4 * esize;

	585 addr3 += 4 * esize;

	586 addr4 += 4 * esize;

	587 }

	588 }

	589

	590 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,

	591 LogicVRegister dst3, LogicVRegister dst4, int index,

	592 uint64_t addr) {

	593 int esize = LaneSizeInBytesFromFormat(vform);

	594 dst.WriteUintToMem(vform, index, addr);

	595 dst2.WriteUintToMem(vform, index, addr + 1 * esize);

	596 dst3.WriteUintToMem(vform, index, addr + 2 * esize);

	597 dst4.WriteUintToMem(vform, index, addr + 3 * esize);

	598 }

	599

	600 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,

	601 const LogicVRegister& src1,

	602 const LogicVRegister& src2, Condition cond) {

	603 dst.ClearForWrite(vform);

	604 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	605 int64_t sa = src1.Int(vform, i);

	606 int64_t sb = src2.Int(vform, i);

	607 uint64_t ua = src1.Uint(vform, i);

	608 uint64_t ub = src2.Uint(vform, i);

	609 bool result = false;

	610 switch (cond) {

	611 case eq:

	612 result = (ua == ub);

	613 break;

	614 case ge:

	615 result = (sa >= sb);

	616 break;

	617 case gt:

	618 result = (sa > sb);

	619 break;

	620 case hi:

	621 result = (ua > ub);

	622 break;

	623 case hs:

	624 result = (ua >= ub);

	625 break;

	626 case lt:

	627 result = (sa < sb);

	628 break;

	629 case le:

	630 result = (sa <= sb);

	631 break;

	632 default:

	633 UNREACHABLE();

	634 break;

	635 }

	636 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);

	637 }

	638 return dst;

	639 }

	640

	641 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,

	642 const LogicVRegister& src1, int imm,

	643 Condition cond) {

	644 SimVRegister temp;

	645 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);

	646 return cmp(vform, dst, src1, imm_reg, cond);

	647 }

	648

	649 LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst,

	650 const LogicVRegister& src1,

	651 const LogicVRegister& src2) {

	652 dst.ClearForWrite(vform);

	653 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	654 uint64_t ua = src1.Uint(vform, i);

	655 uint64_t ub = src2.Uint(vform, i);

	656 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);

	657 }

	658 return dst;

	659 }

	660

	661 LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst,

	662 const LogicVRegister& src1,

	663 const LogicVRegister& src2) {

	664 int lane_size = LaneSizeInBitsFromFormat(vform);

	665 dst.ClearForWrite(vform);

	666 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	667 // Test for unsigned saturation.

	668 uint64_t ua = src1.UintLeftJustified(vform, i);

	669 uint64_t ub = src2.UintLeftJustified(vform, i);

	670 uint64_t ur = ua + ub;

	671 if (ur < ua) {

	672 dst.SetUnsignedSat(i, true);

	673 }

	674

	675 // Test for signed saturation.

	676 bool pos_a = (ua >> 63) == 0;

	677 bool pos_b = (ub >> 63) == 0;

	678 bool pos_r = (ur >> 63) == 0;

	679 // If the signs of the operands are the same, but different from the result,

	680 // there was an overflow.

	681 if ((pos_a == pos_b) && (pos_a != pos_r)) {

	682 dst.SetSignedSat(i, pos_a);

	683 }

	684

	685 dst.SetInt(vform, i, ur >> (64 - lane_size));

	686 }

	687 return dst;

	688 }

	689

	690 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,

	691 const LogicVRegister& src1,

	692 const LogicVRegister& src2) {

	693 SimVRegister temp1, temp2;

	694 uzp1(vform, temp1, src1, src2);

	695 uzp2(vform, temp2, src1, src2);

	696 add(vform, dst, temp1, temp2);

	697 return dst;

	698 }

	699

	700 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,

	701 const LogicVRegister& src1,

	702 const LogicVRegister& src2) {

	703 SimVRegister temp;

	704 mul(vform, temp, src1, src2);

	705 add(vform, dst, dst, temp);

	706 return dst;

	707 }

	708

	709 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,

	710 const LogicVRegister& src1,

	711 const LogicVRegister& src2) {

	712 SimVRegister temp;

	713 mul(vform, temp, src1, src2);

	714 sub(vform, dst, dst, temp);

	715 return dst;

	716 }

	717

	718 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,

	719 const LogicVRegister& src1,

	720 const LogicVRegister& src2) {

	721 dst.ClearForWrite(vform);

	722 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	723 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));

	724 }

	725 return dst;

	726 }

	727

	728 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,

	729 const LogicVRegister& src1,

	730 const LogicVRegister& src2, int index) {

	731 SimVRegister temp;

	732 VectorFormat indexform = VectorFormatFillQ(vform);

	733 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));

	734 }

	735

	736 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,

	737 const LogicVRegister& src1,

	738 const LogicVRegister& src2, int index) {

	739 SimVRegister temp;

	740 VectorFormat indexform = VectorFormatFillQ(vform);

	741 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));

	742 }

	743

	744 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,

	745 const LogicVRegister& src1,

	746 const LogicVRegister& src2, int index) {

	747 SimVRegister temp;

	748 VectorFormat indexform = VectorFormatFillQ(vform);

	749 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));

	750 }

	751

	752 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,

	753 const LogicVRegister& src1,

	754 const LogicVRegister& src2, int index) {

	755 SimVRegister temp;

	756 VectorFormat indexform =

	757 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	758 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));

	759 }

	760

	761 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,

	762 const LogicVRegister& src1,

	763 const LogicVRegister& src2, int index) {

	764 SimVRegister temp;

	765 VectorFormat indexform =

	766 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	767 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	768 }

	769

	770 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,

	771 const LogicVRegister& src1,

	772 const LogicVRegister& src2, int index) {

	773 SimVRegister temp;

	774 VectorFormat indexform =

	775 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	776 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));

	777 }

	778

	779 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,

	780 const LogicVRegister& src1,

	781 const LogicVRegister& src2, int index) {

	782 SimVRegister temp;

	783 VectorFormat indexform =

	784 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	785 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	786 }

	787

	788 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,

	789 const LogicVRegister& src1,

	790 const LogicVRegister& src2, int index) {

	791 SimVRegister temp;

	792 VectorFormat indexform =

	793 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	794 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));

	795 }

	796

	797 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,

	798 const LogicVRegister& src1,

	799 const LogicVRegister& src2, int index) {

	800 SimVRegister temp;

	801 VectorFormat indexform =

	802 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	803 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	804 }

	805

	806 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,

	807 const LogicVRegister& src1,

	808 const LogicVRegister& src2, int index) {

	809 SimVRegister temp;

	810 VectorFormat indexform =

	811 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	812 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));

	813 }

	814

	815 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,

	816 const LogicVRegister& src1,

	817 const LogicVRegister& src2, int index) {

	818 SimVRegister temp;

	819 VectorFormat indexform =

	820 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	821 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	822 }

	823

	824 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,

	825 const LogicVRegister& src1,

	826 const LogicVRegister& src2, int index) {

	827 SimVRegister temp;

	828 VectorFormat indexform =

	829 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	830 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));

	831 }

	832

	833 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,

	834 const LogicVRegister& src1,

	835 const LogicVRegister& src2, int index) {

	836 SimVRegister temp;

	837 VectorFormat indexform =

	838 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	839 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	840 }

	841

	842 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,

	843 const LogicVRegister& src1,

	844 const LogicVRegister& src2, int index) {

	845 SimVRegister temp;

	846 VectorFormat indexform =

	847 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	848 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));

	849 }

	850

	851 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,

	852 const LogicVRegister& src1,

	853 const LogicVRegister& src2, int index) {

	854 SimVRegister temp;

	855 VectorFormat indexform =

	856 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	857 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	858 }

	859

	860 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,

	861 const LogicVRegister& src1,

	862 const LogicVRegister& src2, int index) {

	863 SimVRegister temp;

	864 VectorFormat indexform =

	865 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	866 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));

	867 }

	868

	869 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,

	870 const LogicVRegister& src1,

	871 const LogicVRegister& src2, int index) {

	872 SimVRegister temp;

	873 VectorFormat indexform =

	874 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	875 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	876 }

	877

	878 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,

	879 const LogicVRegister& src1,

	880 const LogicVRegister& src2, int index) {

	881 SimVRegister temp;

	882 VectorFormat indexform =

	883 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	884 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));

	885 }

	886

	887 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,

	888 const LogicVRegister& src1,

	889 const LogicVRegister& src2, int index) {

	890 SimVRegister temp;

	891 VectorFormat indexform =

	892 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	893 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	894 }

	895

	896 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,

	897 const LogicVRegister& src1,

	898 const LogicVRegister& src2, int index) {

	899 SimVRegister temp;

	900 VectorFormat indexform =

	901 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	902 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));

	903 }

	904

	905 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,

	906 const LogicVRegister& src1,

	907 const LogicVRegister& src2, int index) {

	908 SimVRegister temp;

	909 VectorFormat indexform =

	910 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));

	911 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));

	912 }

	913

	914 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,

	915 const LogicVRegister& src1,

	916 const LogicVRegister& src2, int index) {

	917 SimVRegister temp;

	918 VectorFormat indexform = VectorFormatFillQ(vform);

	919 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));

	920 }

	921

	922 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,

	923 const LogicVRegister& src1,

	924 const LogicVRegister& src2, int index) {

	925 SimVRegister temp;

	926 VectorFormat indexform = VectorFormatFillQ(vform);

	927 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));

	928 }

	929

	930 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {

	931 uint16_t result = 0;

	932 uint16_t extended_op2 = op2;

	933 for (int i = 0; i < 8; ++i) {

	934 if ((op1 >> i) & 1) {

	935 result = result ^ (extended_op2 << i);

	936 }

	937 }

	938 return result;

	939 }

	940

	941 LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst,

	942 const LogicVRegister& src1,

	943 const LogicVRegister& src2) {

	944 dst.ClearForWrite(vform);

	945 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	946 dst.SetUint(vform, i,

	947 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));

	948 }

	949 return dst;

	950 }

	951

	952 LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst,

	953 const LogicVRegister& src1,

	954 const LogicVRegister& src2) {

	955 VectorFormat vform_src = VectorFormatHalfWidth(vform);

	956 dst.ClearForWrite(vform);

	957 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	958 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, i),

	959 src2.Uint(vform_src, i)));

	960 }

	961 return dst;

	962 }

	963

	964 LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst,

	965 const LogicVRegister& src1,

	966 const LogicVRegister& src2) {

	967 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);

	968 dst.ClearForWrite(vform);

	969 int lane_count = LaneCountFromFormat(vform);

	970 for (int i = 0; i < lane_count; i++) {

	971 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, lane_count + i),

	972 src2.Uint(vform_src, lane_count + i)));

	973 }

	974 return dst;

	975 }

	976

	977 LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst,

	978 const LogicVRegister& src1,

	979 const LogicVRegister& src2) {

	980 int lane_size = LaneSizeInBitsFromFormat(vform);

	981 dst.ClearForWrite(vform);

	982 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	983 // Test for unsigned saturation.

	984 uint64_t ua = src1.UintLeftJustified(vform, i);

	985 uint64_t ub = src2.UintLeftJustified(vform, i);

	986 uint64_t ur = ua - ub;

	987 if (ub > ua) {

	988 dst.SetUnsignedSat(i, false);

	989 }

	990

	991 // Test for signed saturation.

	992 bool pos_a = (ua >> 63) == 0;

	993 bool pos_b = (ub >> 63) == 0;

	994 bool pos_r = (ur >> 63) == 0;

	995 // If the signs of the operands are different, and the sign of the first

	996 // operand doesn't match the result, there was an overflow.

	997 if ((pos_a != pos_b) && (pos_a != pos_r)) {

	998 dst.SetSignedSat(i, pos_a);

	999 }

	1000

	1001 dst.SetInt(vform, i, ur >> (64 - lane_size));

	1002 }

	1003 return dst;

	1004 }

	1005

	1006 LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst,

	1007 const LogicVRegister& src1,

	1008 const LogicVRegister& src2) {

	1009 dst.ClearForWrite(vform);

	1010 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1011 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));

	1012 }

	1013 return dst;

	1014 }

	1015

	1016 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,

	1017 const LogicVRegister& src1,

	1018 const LogicVRegister& src2) {

	1019 dst.ClearForWrite(vform);

	1020 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1021 dst.SetUint(vform, i, src1.Uint(vform, i) \| src2.Uint(vform, i));

	1022 }

	1023 return dst;

	1024 }

	1025

	1026 LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst,

	1027 const LogicVRegister& src1,

	1028 const LogicVRegister& src2) {

	1029 dst.ClearForWrite(vform);

	1030 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1031 dst.SetUint(vform, i, src1.Uint(vform, i) \| ~src2.Uint(vform, i));

	1032 }

	1033 return dst;

	1034 }

	1035

	1036 LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst,

	1037 const LogicVRegister& src1,

	1038 const LogicVRegister& src2) {

	1039 dst.ClearForWrite(vform);

	1040 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1041 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));

	1042 }

	1043 return dst;

	1044 }

	1045

	1046 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,

	1047 const LogicVRegister& src1,

	1048 const LogicVRegister& src2) {

	1049 dst.ClearForWrite(vform);

	1050 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1051 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));

	1052 }

	1053 return dst;

	1054 }

	1055

	1056 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,

	1057 const LogicVRegister& src, uint64_t imm) {

	1058 uint64_t result[16];

	1059 int laneCount = LaneCountFromFormat(vform);

	1060 for (int i = 0; i < laneCount; ++i) {

	1061 result[i] = src.Uint(vform, i) & ~imm;

	1062 }

	1063 dst.SetUintArray(vform, result);

	1064 return dst;

	1065 }

	1066

	1067 LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst,

	1068 const LogicVRegister& src1,

	1069 const LogicVRegister& src2) {

	1070 dst.ClearForWrite(vform);

	1071 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1072 uint64_t operand1 = dst.Uint(vform, i);

	1073 uint64_t operand2 = ~src2.Uint(vform, i);

	1074 uint64_t operand3 = src1.Uint(vform, i);

	1075 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);

	1076 dst.SetUint(vform, i, result);

	1077 }

	1078 return dst;

	1079 }

	1080

	1081 LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst,

	1082 const LogicVRegister& src1,

	1083 const LogicVRegister& src2) {

	1084 dst.ClearForWrite(vform);

	1085 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1086 uint64_t operand1 = dst.Uint(vform, i);

	1087 uint64_t operand2 = src2.Uint(vform, i);

	1088 uint64_t operand3 = src1.Uint(vform, i);

	1089 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);

	1090 dst.SetUint(vform, i, result);

	1091 }

	1092 return dst;

	1093 }

	1094

	1095 LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst,

	1096 const LogicVRegister& src1,

	1097 const LogicVRegister& src2) {

	1098 dst.ClearForWrite(vform);

	1099 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1100 uint64_t operand1 = src2.Uint(vform, i);

	1101 uint64_t operand2 = dst.Uint(vform, i);

	1102 uint64_t operand3 = src1.Uint(vform, i);

	1103 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);

	1104 dst.SetUint(vform, i, result);

	1105 }

	1106 return dst;

	1107 }

	1108

	1109 LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst,

	1110 const LogicVRegister& src1,

	1111 const LogicVRegister& src2, bool max) {

	1112 dst.ClearForWrite(vform);

	1113 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1114 int64_t src1_val = src1.Int(vform, i);

	1115 int64_t src2_val = src2.Int(vform, i);

	1116 int64_t dst_val;

	1117 if (max) {

	1118 dst_val = (src1_val > src2_val) ? src1_val : src2_val;

	1119 } else {

	1120 dst_val = (src1_val < src2_val) ? src1_val : src2_val;

	1121 }

	1122 dst.SetInt(vform, i, dst_val);

	1123 }

	1124 return dst;

	1125 }

	1126

	1127 LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst,

	1128 const LogicVRegister& src1,

	1129 const LogicVRegister& src2) {

	1130 return SMinMax(vform, dst, src1, src2, true);

	1131 }

	1132

	1133 LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst,

	1134 const LogicVRegister& src1,

	1135 const LogicVRegister& src2) {

	1136 return SMinMax(vform, dst, src1, src2, false);

	1137 }

	1138

	1139 LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst,

	1140 const LogicVRegister& src1,

	1141 const LogicVRegister& src2, bool max) {

	1142 int lanes = LaneCountFromFormat(vform);

	1143 int64_t result[kMaxLanesPerVector];

	1144 const LogicVRegister* src = &src1;

	1145 for (int j = 0; j < 2; j++) {

	1146 for (int i = 0; i < lanes; i += 2) {

	1147 int64_t first_val = src->Int(vform, i);

	1148 int64_t second_val = src->Int(vform, i + 1);

	1149 int64_t dst_val;

	1150 if (max) {

	1151 dst_val = (first_val > second_val) ? first_val : second_val;

	1152 } else {

	1153 dst_val = (first_val < second_val) ? first_val : second_val;

	1154 }

	1155 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);

	1156 result[(i >> 1) + (j * lanes / 2)] = dst_val;

	1157 }

	1158 src = &src2;

	1159 }

	1160 dst.SetIntArray(vform, result);

	1161 return dst;

	1162 }

	1163

	1164 LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst,

	1165 const LogicVRegister& src1,

	1166 const LogicVRegister& src2) {

	1167 return SMinMaxP(vform, dst, src1, src2, true);

	1168 }

	1169

	1170 LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst,

	1171 const LogicVRegister& src1,

	1172 const LogicVRegister& src2) {

	1173 return SMinMaxP(vform, dst, src1, src2, false);

	1174 }

	1175

	1176 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,

	1177 const LogicVRegister& src) {

	1178 DCHECK_EQ(vform, kFormatD);

	1179

	1180 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);

	1181 dst.ClearForWrite(vform);

	1182 dst.SetUint(vform, 0, dst_val);

	1183 return dst;

	1184 }

	1185

	1186 LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst,

	1187 const LogicVRegister& src) {

	1188 VectorFormat vform_dst =

	1189 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));

	1190

	1191 int64_t dst_val = 0;

	1192 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1193 dst_val += src.Int(vform, i);

	1194 }

	1195

	1196 dst.ClearForWrite(vform_dst);

	1197 dst.SetInt(vform_dst, 0, dst_val);

	1198 return dst;

	1199 }

	1200

	1201 LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst,

	1202 const LogicVRegister& src) {

	1203 VectorFormat vform_dst =

	1204 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);

	1205

	1206 int64_t dst_val = 0;

	1207 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1208 dst_val += src.Int(vform, i);

	1209 }

	1210

	1211 dst.ClearForWrite(vform_dst);

	1212 dst.SetInt(vform_dst, 0, dst_val);

	1213 return dst;

	1214 }

	1215

	1216 LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst,

	1217 const LogicVRegister& src) {

	1218 VectorFormat vform_dst =

	1219 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);

	1220

	1221 uint64_t dst_val = 0;

	1222 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1223 dst_val += src.Uint(vform, i);

	1224 }

	1225

	1226 dst.ClearForWrite(vform_dst);

	1227 dst.SetUint(vform_dst, 0, dst_val);

	1228 return dst;

	1229 }

	1230

	1231 LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst,

	1232 const LogicVRegister& src, bool max) {

	1233 int64_t dst_val = max ? INT64_MIN : INT64_MAX;

	1234 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1235 int64_t src_val = src.Int(vform, i);

	1236 if (max) {

	1237 dst_val = (src_val > dst_val) ? src_val : dst_val;

	1238 } else {

	1239 dst_val = (src_val < dst_val) ? src_val : dst_val;

	1240 }

	1241 }

	1242 dst.ClearForWrite(ScalarFormatFromFormat(vform));

	1243 dst.SetInt(vform, 0, dst_val);

	1244 return dst;

	1245 }

	1246

	1247 LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst,

	1248 const LogicVRegister& src) {

	1249 SMinMaxV(vform, dst, src, true);

	1250 return dst;

	1251 }

	1252

	1253 LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst,

	1254 const LogicVRegister& src) {

	1255 SMinMaxV(vform, dst, src, false);

	1256 return dst;

	1257 }

	1258

	1259 LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst,

	1260 const LogicVRegister& src1,

	1261 const LogicVRegister& src2, bool max) {

	1262 dst.ClearForWrite(vform);

	1263 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1264 uint64_t src1_val = src1.Uint(vform, i);

	1265 uint64_t src2_val = src2.Uint(vform, i);

	1266 uint64_t dst_val;

	1267 if (max) {

	1268 dst_val = (src1_val > src2_val) ? src1_val : src2_val;

	1269 } else {

	1270 dst_val = (src1_val < src2_val) ? src1_val : src2_val;

	1271 }

	1272 dst.SetUint(vform, i, dst_val);

	1273 }

	1274 return dst;

	1275 }

	1276

	1277 LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst,

	1278 const LogicVRegister& src1,

	1279 const LogicVRegister& src2) {

	1280 return UMinMax(vform, dst, src1, src2, true);

	1281 }

	1282

	1283 LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst,

	1284 const LogicVRegister& src1,

	1285 const LogicVRegister& src2) {

	1286 return UMinMax(vform, dst, src1, src2, false);

	1287 }

	1288

	1289 LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst,

	1290 const LogicVRegister& src1,

	1291 const LogicVRegister& src2, bool max) {

	1292 int lanes = LaneCountFromFormat(vform);

	1293 uint64_t result[kMaxLanesPerVector];

	1294 const LogicVRegister* src = &src1;

	1295 for (int j = 0; j < 2; j++) {

	1296 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {

	1297 uint64_t first_val = src->Uint(vform, i);

	1298 uint64_t second_val = src->Uint(vform, i + 1);

	1299 uint64_t dst_val;

	1300 if (max) {

	1301 dst_val = (first_val > second_val) ? first_val : second_val;

	1302 } else {

	1303 dst_val = (first_val < second_val) ? first_val : second_val;

	1304 }

	1305 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);

	1306 result[(i >> 1) + (j * lanes / 2)] = dst_val;

	1307 }

	1308 src = &src2;

	1309 }

	1310 dst.SetUintArray(vform, result);

	1311 return dst;

	1312 }

	1313

	1314 LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst,

	1315 const LogicVRegister& src1,

	1316 const LogicVRegister& src2) {

	1317 return UMinMaxP(vform, dst, src1, src2, true);

	1318 }

	1319

	1320 LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst,

	1321 const LogicVRegister& src1,

	1322 const LogicVRegister& src2) {

	1323 return UMinMaxP(vform, dst, src1, src2, false);

	1324 }

	1325

	1326 LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst,

	1327 const LogicVRegister& src, bool max) {

	1328 uint64_t dst_val = max ? 0 : UINT64_MAX;

	1329 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1330 uint64_t src_val = src.Uint(vform, i);

	1331 if (max) {

	1332 dst_val = (src_val > dst_val) ? src_val : dst_val;

	1333 } else {

	1334 dst_val = (src_val < dst_val) ? src_val : dst_val;

	1335 }

	1336 }

	1337 dst.ClearForWrite(ScalarFormatFromFormat(vform));

	1338 dst.SetUint(vform, 0, dst_val);

	1339 return dst;

	1340 }

	1341

	1342 LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst,

	1343 const LogicVRegister& src) {

	1344 UMinMaxV(vform, dst, src, true);

	1345 return dst;

	1346 }

	1347

	1348 LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst,

	1349 const LogicVRegister& src) {

	1350 UMinMaxV(vform, dst, src, false);

	1351 return dst;

	1352 }

	1353

	1354 LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst,

	1355 const LogicVRegister& src, int shift) {

	1356 DCHECK_GE(shift, 0);

	1357 SimVRegister temp;

	1358 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);

	1359 return ushl(vform, dst, src, shiftreg);

	1360 }

	1361

	1362 LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst,

	1363 const LogicVRegister& src, int shift) {

	1364 DCHECK_GE(shift, 0);

	1365 SimVRegister temp1, temp2;

	1366 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);

	1367 LogicVRegister extendedreg = sxtl(vform, temp2, src);

	1368 return sshl(vform, dst, extendedreg, shiftreg);

	1369 }

	1370

	1371 LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst,

	1372 const LogicVRegister& src, int shift) {

	1373 DCHECK_GE(shift, 0);

	1374 SimVRegister temp1, temp2;

	1375 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);

	1376 LogicVRegister extendedreg = sxtl2(vform, temp2, src);

	1377 return sshl(vform, dst, extendedreg, shiftreg);

	1378 }

	1379

	1380 LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst,

	1381 const LogicVRegister& src) {

	1382 int shift = LaneSizeInBitsFromFormat(vform) / 2;

	1383 return sshll(vform, dst, src, shift);

	1384 }

	1385

	1386 LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst,

	1387 const LogicVRegister& src) {

	1388 int shift = LaneSizeInBitsFromFormat(vform) / 2;

	1389 return sshll2(vform, dst, src, shift);

	1390 }

	1391

	1392 LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst,

	1393 const LogicVRegister& src, int shift) {

	1394 DCHECK_GE(shift, 0);

	1395 SimVRegister temp1, temp2;

	1396 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);

	1397 LogicVRegister extendedreg = uxtl(vform, temp2, src);

	1398 return ushl(vform, dst, extendedreg, shiftreg);

	1399 }

	1400

	1401 LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst,

	1402 const LogicVRegister& src, int shift) {

	1403 DCHECK_GE(shift, 0);

	1404 SimVRegister temp1, temp2;

	1405 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);

	1406 LogicVRegister extendedreg = uxtl2(vform, temp2, src);

	1407 return ushl(vform, dst, extendedreg, shiftreg);

	1408 }

	1409

	1410 LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst,

	1411 const LogicVRegister& src, int shift) {

	1412 dst.ClearForWrite(vform);

	1413 int laneCount = LaneCountFromFormat(vform);

	1414 for (int i = 0; i < laneCount; i++) {

	1415 uint64_t src_lane = src.Uint(vform, i);

	1416 uint64_t dst_lane = dst.Uint(vform, i);

	1417 uint64_t shifted = src_lane << shift;

	1418 uint64_t mask = MaxUintFromFormat(vform) << shift;

	1419 dst.SetUint(vform, i, (dst_lane & ~mask) \| shifted);

	1420 }

	1421 return dst;

	1422 }

	1423

	1424 LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst,

	1425 const LogicVRegister& src, int shift) {

	1426 DCHECK_GE(shift, 0);

	1427 SimVRegister temp;

	1428 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);

	1429 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);

	1430 }

	1431

	1432 LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst,

	1433 const LogicVRegister& src, int shift) {

	1434 DCHECK_GE(shift, 0);

	1435 SimVRegister temp;

	1436 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);

	1437 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);

	1438 }

	1439

	1440 LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst,

	1441 const LogicVRegister& src, int shift) {

	1442 DCHECK_GE(shift, 0);

	1443 SimVRegister temp;

	1444 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);

	1445 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);

	1446 }

	1447

	1448 LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst,

	1449 const LogicVRegister& src, int shift) {

	1450 dst.ClearForWrite(vform);

	1451 int laneCount = LaneCountFromFormat(vform);

	1452 DCHECK((shift > 0) &&

	1453 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));

	1454 for (int i = 0; i < laneCount; i++) {

	1455 uint64_t src_lane = src.Uint(vform, i);

	1456 uint64_t dst_lane = dst.Uint(vform, i);

	1457 uint64_t shifted;

	1458 uint64_t mask;

	1459 if (shift == 64) {

	1460 shifted = 0;

	1461 mask = 0;

	1462 } else {

	1463 shifted = src_lane >> shift;

	1464 mask = MaxUintFromFormat(vform) >> shift;

	1465 }

	1466 dst.SetUint(vform, i, (dst_lane & ~mask) \| shifted);

	1467 }

	1468 return dst;

	1469 }

	1470

	1471 LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst,

	1472 const LogicVRegister& src, int shift) {

	1473 DCHECK_GE(shift, 0);

	1474 SimVRegister temp;

	1475 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);

	1476 return ushl(vform, dst, src, shiftreg);

	1477 }

	1478

	1479 LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst,

	1480 const LogicVRegister& src, int shift) {

	1481 DCHECK_GE(shift, 0);

	1482 SimVRegister temp;

	1483 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);

	1484 return sshl(vform, dst, src, shiftreg);

	1485 }

	1486

	1487 LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst,

	1488 const LogicVRegister& src, int shift) {

	1489 SimVRegister temp;

	1490 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);

	1491 return add(vform, dst, dst, shifted_reg);

	1492 }

	1493

	1494 LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst,

	1495 const LogicVRegister& src, int shift) {

	1496 SimVRegister temp;

	1497 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);

	1498 return add(vform, dst, dst, shifted_reg);

	1499 }

	1500

	1501 LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst,

	1502 const LogicVRegister& src, int shift) {

	1503 SimVRegister temp;

	1504 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);

	1505 return add(vform, dst, dst, shifted_reg);

	1506 }

	1507

	1508 LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst,

	1509 const LogicVRegister& src, int shift) {

	1510 SimVRegister temp;

	1511 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);

	1512 return add(vform, dst, dst, shifted_reg);

	1513 }

	1514

	1515 LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst,

	1516 const LogicVRegister& src) {

	1517 uint64_t result[16];

	1518 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);

	1519 int laneCount = LaneCountFromFormat(vform);

	1520 for (int i = 0; i < laneCount; i++) {

	1521 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);

	1522 }

	1523

	1524 dst.SetUintArray(vform, result);

	1525 return dst;

	1526 }

	1527

	1528 LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst,

	1529 const LogicVRegister& src) {

	1530 uint64_t result[16];

	1531 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);

	1532 int laneCount = LaneCountFromFormat(vform);

	1533 for (int i = 0; i < laneCount; i++) {

	1534 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);

	1535 }

	1536

	1537 dst.SetUintArray(vform, result);

	1538 return dst;

	1539 }

	1540

	1541 LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst,

	1542 const LogicVRegister& src) {

	1543 uint64_t result[16];

	1544 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);

	1545 int laneCount = LaneCountFromFormat(vform);

	1546 for (int i = 0; i < laneCount; i++) {

	1547 uint64_t value = src.Uint(vform, i);

	1548 result[i] = 0;

	1549 for (int j = 0; j < laneSizeInBits; j++) {

	1550 result[i] += (value & 1);

	1551 value >>= 1;

	1552 }

	1553 }

	1554

	1555 dst.SetUintArray(vform, result);

	1556 return dst;

	1557 }

	1558

	1559 LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst,

	1560 const LogicVRegister& src1,

	1561 const LogicVRegister& src2) {

	1562 dst.ClearForWrite(vform);

	1563 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1564 int8_t shift_val = src2.Int(vform, i);

	1565 int64_t lj_src_val = src1.IntLeftJustified(vform, i);

	1566

	1567 // Set signed saturation state.

	1568 if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) &&

	1569 (lj_src_val != 0)) {

	1570 dst.SetSignedSat(i, lj_src_val >= 0);

	1571 }

	1572

	1573 // Set unsigned saturation state.

	1574 if (lj_src_val < 0) {

	1575 dst.SetUnsignedSat(i, false);

	1576 } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) &&

	1577 (lj_src_val != 0)) {

	1578 dst.SetUnsignedSat(i, true);

	1579 }

	1580

	1581 int64_t src_val = src1.Int(vform, i);

	1582 bool src_is_negative = src_val < 0;

	1583 if (shift_val > 63) {

	1584 dst.SetInt(vform, i, 0);

	1585 } else if (shift_val < -63) {

	1586 dst.SetRounding(i, src_is_negative);

	1587 dst.SetInt(vform, i, src_is_negative ? -1 : 0);

	1588 } else {

	1589 // Use unsigned types for shifts, as behaviour is undefined for signed

	1590 // lhs.

	1591 uint64_t usrc_val = static_cast<uint64_t>(src_val);

	1592

	1593 if (shift_val < 0) {

	1594 // Convert to right shift.

	1595 shift_val = -shift_val;

	1596

	1597 // Set rounding state by testing most-significant bit shifted out.

	1598 // Rounding only needed on right shifts.

	1599 if (((usrc_val >> (shift_val - 1)) & 1) == 1) {

	1600 dst.SetRounding(i, true);

	1601 }

	1602

	1603 usrc_val >>= shift_val;

	1604

	1605 if (src_is_negative) {

	1606 // Simulate sign-extension.

	1607 usrc_val \|= (~UINT64_C(0) << (64 - shift_val));

	1608 }

	1609 } else {

	1610 usrc_val <<= shift_val;

	1611 }

	1612 dst.SetUint(vform, i, usrc_val);

	1613 }

	1614 }

	1615 return dst;

	1616 }

	1617

	1618 LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst,

	1619 const LogicVRegister& src1,

	1620 const LogicVRegister& src2) {

	1621 dst.ClearForWrite(vform);

	1622 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1623 int8_t shift_val = src2.Int(vform, i);

	1624 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);

	1625

	1626 // Set saturation state.

	1627 if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) {

	1628 dst.SetUnsignedSat(i, true);

	1629 }

	1630

	1631 uint64_t src_val = src1.Uint(vform, i);

	1632 if ((shift_val > 63) \|\| (shift_val < -64)) {

	1633 dst.SetUint(vform, i, 0);

	1634 } else {

	1635 if (shift_val < 0) {

	1636 // Set rounding state. Rounding only needed on right shifts.

	1637 if (((src_val >> (-shift_val - 1)) & 1) == 1) {

	1638 dst.SetRounding(i, true);

	1639 }

	1640

	1641 if (shift_val == -64) {

	1642 src_val = 0;

	1643 } else {

	1644 src_val >>= -shift_val;

	1645 }

	1646 } else {

	1647 src_val <<= shift_val;

	1648 }

	1649 dst.SetUint(vform, i, src_val);

	1650 }

	1651 }

	1652 return dst;

	1653 }

	1654

	1655 LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst,

	1656 const LogicVRegister& src) {

	1657 dst.ClearForWrite(vform);

	1658 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1659 // Test for signed saturation.

	1660 int64_t sa = src.Int(vform, i);

	1661 if (sa == MinIntFromFormat(vform)) {

	1662 dst.SetSignedSat(i, true);

	1663 }

	1664 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);

	1665 }

	1666 return dst;

	1667 }

	1668

	1669 LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst,

	1670 const LogicVRegister& src) {

	1671 dst.ClearForWrite(vform);

	1672 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1673 int64_t sa = dst.IntLeftJustified(vform, i);

	1674 uint64_t ub = src.UintLeftJustified(vform, i);

	1675 uint64_t ur = sa + ub;

	1676

	1677 int64_t sr = bit_cast<int64_t>(ur);

	1678 if (sr < sa) { // Test for signed positive saturation.

	1679 dst.SetInt(vform, i, MaxIntFromFormat(vform));

	1680 } else {

	1681 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));

	1682 }

	1683 }

	1684 return dst;

	1685 }

	1686

	1687 LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst,

	1688 const LogicVRegister& src) {

	1689 dst.ClearForWrite(vform);

	1690 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1691 uint64_t ua = dst.UintLeftJustified(vform, i);

	1692 int64_t sb = src.IntLeftJustified(vform, i);

	1693 uint64_t ur = ua + sb;

	1694

	1695 if ((sb > 0) && (ur <= ua)) {

	1696 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.

	1697 } else if ((sb < 0) && (ur >= ua)) {

	1698 dst.SetUint(vform, i, 0); // Negative saturation.

	1699 } else {

	1700 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));

	1701 }

	1702 }

	1703 return dst;

	1704 }

	1705

	1706 LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst,

	1707 const LogicVRegister& src) {

	1708 dst.ClearForWrite(vform);

	1709 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1710 // Test for signed saturation.

	1711 int64_t sa = src.Int(vform, i);

	1712 if (sa == MinIntFromFormat(vform)) {

	1713 dst.SetSignedSat(i, true);

	1714 }

	1715 if (sa < 0) {

	1716 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);

	1717 } else {

	1718 dst.SetInt(vform, i, sa);

	1719 }

	1720 }

	1721 return dst;

	1722 }

	1723

	1724 LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform,

	1725 LogicVRegister dst, bool dstIsSigned,

	1726 const LogicVRegister& src,

	1727 bool srcIsSigned) {

	1728 bool upperhalf = false;

	1729 VectorFormat srcform = kFormatUndefined;

	1730 int64_t ssrc[8];

	1731 uint64_t usrc[8];

	1732

	1733 switch (dstform) {

	1734 case kFormat8B:

	1735 upperhalf = false;

	1736 srcform = kFormat8H;

	1737 break;

	1738 case kFormat16B:

	1739 upperhalf = true;

	1740 srcform = kFormat8H;

	1741 break;

	1742 case kFormat4H:

	1743 upperhalf = false;

	1744 srcform = kFormat4S;

	1745 break;

	1746 case kFormat8H:

	1747 upperhalf = true;

	1748 srcform = kFormat4S;

	1749 break;

	1750 case kFormat2S:

	1751 upperhalf = false;

	1752 srcform = kFormat2D;

	1753 break;

	1754 case kFormat4S:

	1755 upperhalf = true;

	1756 srcform = kFormat2D;

	1757 break;

	1758 case kFormatB:

	1759 upperhalf = false;

	1760 srcform = kFormatH;

	1761 break;

	1762 case kFormatH:

	1763 upperhalf = false;

	1764 srcform = kFormatS;

	1765 break;

	1766 case kFormatS:

	1767 upperhalf = false;

	1768 srcform = kFormatD;

	1769 break;

	1770 default:

	1771 UNIMPLEMENTED();

	1772 }

	1773

	1774 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {

	1775 ssrc[i] = src.Int(srcform, i);

	1776 usrc[i] = src.Uint(srcform, i);

	1777 }

	1778

	1779 int offset;

	1780 if (upperhalf) {

	1781 offset = LaneCountFromFormat(dstform) / 2;

	1782 } else {

	1783 offset = 0;

	1784 dst.ClearForWrite(dstform);

	1785 }

	1786

	1787 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {

	1788 // Test for signed saturation

	1789 if (ssrc[i] > MaxIntFromFormat(dstform)) {

	1790 dst.SetSignedSat(offset + i, true);

	1791 } else if (ssrc[i] < MinIntFromFormat(dstform)) {

	1792 dst.SetSignedSat(offset + i, false);

	1793 }

	1794

	1795 // Test for unsigned saturation

	1796 if (srcIsSigned) {

	1797 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {

	1798 dst.SetUnsignedSat(offset + i, true);

	1799 } else if (ssrc[i] < 0) {

	1800 dst.SetUnsignedSat(offset + i, false);

	1801 }

	1802 } else {

	1803 if (usrc[i] > MaxUintFromFormat(dstform)) {

	1804 dst.SetUnsignedSat(offset + i, true);

	1805 }

	1806 }

	1807

	1808 int64_t result;

	1809 if (srcIsSigned) {

	1810 result = ssrc[i] & MaxUintFromFormat(dstform);

	1811 } else {

	1812 result = usrc[i] & MaxUintFromFormat(dstform);

	1813 }

	1814

	1815 if (dstIsSigned) {

	1816 dst.SetInt(dstform, offset + i, result);

	1817 } else {

	1818 dst.SetUint(dstform, offset + i, result);

	1819 }

	1820 }

	1821 return dst;

	1822 }

	1823

	1824 LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst,

	1825 const LogicVRegister& src) {

	1826 return ExtractNarrow(vform, dst, true, src, true);

	1827 }

	1828

	1829 LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst,

	1830 const LogicVRegister& src) {

	1831 return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform);

	1832 }

	1833

	1834 LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst,

	1835 const LogicVRegister& src) {

	1836 return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform);

	1837 }

	1838

	1839 LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst,

	1840 const LogicVRegister& src) {

	1841 return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform);

	1842 }

	1843

	1844 LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst,

	1845 const LogicVRegister& src1,

	1846 const LogicVRegister& src2, bool issigned) {

	1847 dst.ClearForWrite(vform);

	1848 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1849 if (issigned) {

	1850 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);

	1851 sr = sr > 0 ? sr : -sr;

	1852 dst.SetInt(vform, i, sr);

	1853 } else {

	1854 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);

	1855 sr = sr > 0 ? sr : -sr;

	1856 dst.SetUint(vform, i, sr);

	1857 }

	1858 }

	1859 return dst;

	1860 }

	1861

	1862 LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst,

	1863 const LogicVRegister& src1,

	1864 const LogicVRegister& src2) {

	1865 SimVRegister temp;

	1866 dst.ClearForWrite(vform);

	1867 AbsDiff(vform, temp, src1, src2, true);

	1868 add(vform, dst, dst, temp);

	1869 return dst;

	1870 }

	1871

	1872 LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst,

	1873 const LogicVRegister& src1,

	1874 const LogicVRegister& src2) {

	1875 SimVRegister temp;

	1876 dst.ClearForWrite(vform);

	1877 AbsDiff(vform, temp, src1, src2, false);

	1878 add(vform, dst, dst, temp);

	1879 return dst;

	1880 }

	1881

	1882 LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst,

	1883 const LogicVRegister& src) {

	1884 dst.ClearForWrite(vform);

	1885 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	1886 dst.SetUint(vform, i, ~src.Uint(vform, i));

	1887 }

	1888 return dst;

	1889 }

	1890

	1891 LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst,

	1892 const LogicVRegister& src) {

	1893 uint64_t result[16];

	1894 int laneCount = LaneCountFromFormat(vform);

	1895 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);

	1896 uint64_t reversed_value;

	1897 uint64_t value;

	1898 for (int i = 0; i < laneCount; i++) {

	1899 value = src.Uint(vform, i);

	1900 reversed_value = 0;

	1901 for (int j = 0; j < laneSizeInBits; j++) {

	1902 reversed_value = (reversed_value << 1) \| (value & 1);

	1903 value >>= 1;

	1904 }

	1905 result[i] = reversed_value;

	1906 }

	1907

	1908 dst.SetUintArray(vform, result);

	1909 return dst;

	1910 }

	1911

	1912 LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst,

	1913 const LogicVRegister& src, int revSize) {

	1914 uint64_t result[16];

	1915 int laneCount = LaneCountFromFormat(vform);

	1916 int laneSize = LaneSizeInBytesFromFormat(vform);

	1917 int lanesPerLoop = revSize / laneSize;

	1918 for (int i = 0; i < laneCount; i += lanesPerLoop) {

	1919 for (int j = 0; j < lanesPerLoop; j++) {

	1920 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);

	1921 }

	1922 }

	1923 dst.SetUintArray(vform, result);

	1924 return dst;

	1925 }

	1926

	1927 LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst,

	1928 const LogicVRegister& src) {

	1929 return rev(vform, dst, src, 2);

	1930 }

	1931

	1932 LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst,

	1933 const LogicVRegister& src) {

	1934 return rev(vform, dst, src, 4);

	1935 }

	1936

	1937 LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst,

	1938 const LogicVRegister& src) {

	1939 return rev(vform, dst, src, 8);

	1940 }

	1941

	1942 LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst,

	1943 const LogicVRegister& src, bool is_signed,

	1944 bool do_accumulate) {

	1945 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);

	1946 DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U);

	1947 DCHECK_LE(LaneCountFromFormat(vform), 8);

	1948

	1949 uint64_t result[8];

	1950 int lane_count = LaneCountFromFormat(vform);

	1951 for (int i = 0; i < lane_count; i++) {

	1952 if (is_signed) {

	1953 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +

	1954 src.Int(vformsrc, 2 * i + 1));

	1955 } else {

	1956 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);

	1957 }

	1958 }

	1959

	1960 dst.ClearForWrite(vform);

	1961 for (int i = 0; i < lane_count; ++i) {

	1962 if (do_accumulate) {

	1963 result[i] += dst.Uint(vform, i);

	1964 }

	1965 dst.SetUint(vform, i, result[i]);

	1966 }

	1967

	1968 return dst;

	1969 }

	1970

	1971 LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst,

	1972 const LogicVRegister& src) {

	1973 return addlp(vform, dst, src, true, false);

	1974 }

	1975

	1976 LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst,

	1977 const LogicVRegister& src) {

	1978 return addlp(vform, dst, src, false, false);

	1979 }

	1980

	1981 LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst,

	1982 const LogicVRegister& src) {

	1983 return addlp(vform, dst, src, true, true);

	1984 }

	1985

	1986 LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst,

	1987 const LogicVRegister& src) {

	1988 return addlp(vform, dst, src, false, true);

	1989 }

	1990

	1991 LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst,

	1992 const LogicVRegister& src1,

	1993 const LogicVRegister& src2, int index) {

	1994 uint8_t result[16];

	1995 int laneCount = LaneCountFromFormat(vform);

	1996 for (int i = 0; i < laneCount - index; ++i) {

	1997 result[i] = src1.Uint(vform, i + index);

	1998 }

	1999 for (int i = 0; i < index; ++i) {

	2000 result[laneCount - index + i] = src2.Uint(vform, i);

	2001 }

	2002 dst.ClearForWrite(vform);

	2003 for (int i = 0; i < laneCount; ++i) {

	2004 dst.SetUint(vform, i, result[i]);

	2005 }

	2006 return dst;

	2007 }

	2008

	2009 LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst,

	2010 const LogicVRegister& src,

	2011 int src_index) {

	2012 int laneCount = LaneCountFromFormat(vform);

	2013 uint64_t value = src.Uint(vform, src_index);

	2014 dst.ClearForWrite(vform);

	2015 for (int i = 0; i < laneCount; ++i) {

	2016 dst.SetUint(vform, i, value);

	2017 }

	2018 return dst;

	2019 }

	2020

	2021 LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst,

	2022 uint64_t imm) {

	2023 int laneCount = LaneCountFromFormat(vform);

	2024 uint64_t value = imm & MaxUintFromFormat(vform);

	2025 dst.ClearForWrite(vform);

	2026 for (int i = 0; i < laneCount; ++i) {

	2027 dst.SetUint(vform, i, value);

	2028 }

	2029 return dst;

	2030 }

	2031

	2032 LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst,

	2033 int dst_index, const LogicVRegister& src,

	2034 int src_index) {

	2035 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));

	2036 return dst;

	2037 }

	2038

	2039 LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst,

	2040 int dst_index, uint64_t imm) {

	2041 uint64_t value = imm & MaxUintFromFormat(vform);

	2042 dst.SetUint(vform, dst_index, value);

	2043 return dst;

	2044 }

	2045

	2046 LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst,

	2047 uint64_t imm) {

	2048 int laneCount = LaneCountFromFormat(vform);

	2049 dst.ClearForWrite(vform);

	2050 for (int i = 0; i < laneCount; ++i) {

	2051 dst.SetUint(vform, i, imm);

	2052 }

	2053 return dst;

	2054 }

	2055

	2056 LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst,

	2057 uint64_t imm) {

	2058 int laneCount = LaneCountFromFormat(vform);

	2059 dst.ClearForWrite(vform);

	2060 for (int i = 0; i < laneCount; ++i) {

	2061 dst.SetUint(vform, i, ~imm);

	2062 }

	2063 return dst;

	2064 }

	2065

	2066 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,

	2067 const LogicVRegister& src, uint64_t imm) {

	2068 uint64_t result[16];

	2069 int laneCount = LaneCountFromFormat(vform);

	2070 for (int i = 0; i < laneCount; ++i) {

	2071 result[i] = src.Uint(vform, i) \| imm;

	2072 }

	2073 dst.SetUintArray(vform, result);

	2074 return dst;

	2075 }

	2076

	2077 LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst,

	2078 const LogicVRegister& src) {

	2079 VectorFormat vform_half = VectorFormatHalfWidth(vform);

	2080

	2081 dst.ClearForWrite(vform);

	2082 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	2083 dst.SetUint(vform, i, src.Uint(vform_half, i));

	2084 }

	2085 return dst;

	2086 }

	2087

	2088 LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst,

	2089 const LogicVRegister& src) {

	2090 VectorFormat vform_half = VectorFormatHalfWidth(vform);

	2091

	2092 dst.ClearForWrite(vform);

	2093 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	2094 dst.SetInt(vform, i, src.Int(vform_half, i));

	2095 }

	2096 return dst;

	2097 }

	2098

	2099 LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst,

	2100 const LogicVRegister& src) {

	2101 VectorFormat vform_half = VectorFormatHalfWidth(vform);

	2102 int lane_count = LaneCountFromFormat(vform);

	2103

	2104 dst.ClearForWrite(vform);

	2105 for (int i = 0; i < lane_count; i++) {

	2106 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));

	2107 }

	2108 return dst;

	2109 }

	2110

	2111 LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst,

	2112 const LogicVRegister& src) {

	2113 VectorFormat vform_half = VectorFormatHalfWidth(vform);

	2114 int lane_count = LaneCountFromFormat(vform);

	2115

	2116 dst.ClearForWrite(vform);

	2117 for (int i = 0; i < lane_count; i++) {

	2118 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));

	2119 }

	2120 return dst;

	2121 }

	2122

	2123 LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst,

	2124 const LogicVRegister& src, int shift) {

	2125 SimVRegister temp;

	2126 VectorFormat vform_src = VectorFormatDoubleWidth(vform);

	2127 VectorFormat vform_dst = vform;

	2128 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);

	2129 return ExtractNarrow(vform_dst, dst, false, shifted_src, false);

	2130 }

	2131

	2132 LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst,

	2133 const LogicVRegister& src, int shift) {

	2134 SimVRegister temp;

	2135 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));

	2136 VectorFormat vformdst = vform;

	2137 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);

	2138 return ExtractNarrow(vformdst, dst, false, shifted_src, false);

	2139 }

	2140

	2141 LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst,

	2142 const LogicVRegister& src, int shift) {

	2143 SimVRegister temp;

	2144 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);

	2145 VectorFormat vformdst = vform;

	2146 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);

	2147 return ExtractNarrow(vformdst, dst, false, shifted_src, false);

	2148 }

	2149

	2150 LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst,

	2151 const LogicVRegister& src, int shift) {

	2152 SimVRegister temp;

	2153 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));

	2154 VectorFormat vformdst = vform;

	2155 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);

	2156 return ExtractNarrow(vformdst, dst, false, shifted_src, false);

	2157 }

	2158

	2159 LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst,

	2160 const LogicVRegister& ind,

	2161 bool zero_out_of_bounds,

	2162 const LogicVRegister* tab1,

	2163 const LogicVRegister* tab2,

	2164 const LogicVRegister* tab3,

	2165 const LogicVRegister* tab4) {

	2166 DCHECK_NOT_NULL(tab1);

	2167 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};

	2168 uint64_t result[kMaxLanesPerVector];

	2169 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	2170 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);

	2171 }

	2172 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	2173 uint64_t j = ind.Uint(vform, i);

	2174 int tab_idx = static_cast<int>(j >> 4);

	2175 int j_idx = static_cast<int>(j & 15);

	2176 if ((tab_idx < 4) && (tab[tab_idx] != NULL)) {

	2177 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);

	2178 }

	2179 }

	2180 dst.SetUintArray(vform, result);

	2181 return dst;

	2182 }

	2183

	2184 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,

	2185 const LogicVRegister& tab,

	2186 const LogicVRegister& ind) {

	2187 return Table(vform, dst, ind, true, &tab);

	2188 }

	2189

	2190 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,

	2191 const LogicVRegister& tab,

	2192 const LogicVRegister& tab2,

	2193 const LogicVRegister& ind) {

	2194 return Table(vform, dst, ind, true, &tab, &tab2);

	2195 }

	2196

	2197 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,

	2198 const LogicVRegister& tab,

	2199 const LogicVRegister& tab2,

	2200 const LogicVRegister& tab3,

	2201 const LogicVRegister& ind) {

	2202 return Table(vform, dst, ind, true, &tab, &tab2, &tab3);

	2203 }

	2204

	2205 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,

	2206 const LogicVRegister& tab,

	2207 const LogicVRegister& tab2,

	2208 const LogicVRegister& tab3,

	2209 const LogicVRegister& tab4,

	2210 const LogicVRegister& ind) {

	2211 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);

	2212 }

	2213

	2214 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,

	2215 const LogicVRegister& tab,

	2216 const LogicVRegister& ind) {

	2217 return Table(vform, dst, ind, false, &tab);

	2218 }

	2219

	2220 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,

	2221 const LogicVRegister& tab,

	2222 const LogicVRegister& tab2,

	2223 const LogicVRegister& ind) {

	2224 return Table(vform, dst, ind, false, &tab, &tab2);

	2225 }

	2226

	2227 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,

	2228 const LogicVRegister& tab,

	2229 const LogicVRegister& tab2,

	2230 const LogicVRegister& tab3,

	2231 const LogicVRegister& ind) {

	2232 return Table(vform, dst, ind, false, &tab, &tab2, &tab3);

	2233 }

	2234

	2235 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,

	2236 const LogicVRegister& tab,

	2237 const LogicVRegister& tab2,

	2238 const LogicVRegister& tab3,

	2239 const LogicVRegister& tab4,

	2240 const LogicVRegister& ind) {

	2241 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);

	2242 }

	2243

	2244 LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst,

	2245 const LogicVRegister& src, int shift) {

	2246 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);

	2247 }

	2248

	2249 LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst,

	2250 const LogicVRegister& src, int shift) {

	2251 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);

	2252 }

	2253

	2254 LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst,

	2255 const LogicVRegister& src, int shift) {

	2256 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);

	2257 }

	2258

	2259 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst,

	2260 const LogicVRegister& src, int shift) {

	2261 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);

	2262 }

	2263

	2264 LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst,

	2265 const LogicVRegister& src, int shift) {

	2266 SimVRegister temp;

	2267 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);

	2268 VectorFormat vformdst = vform;

	2269 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);

	2270 return sqxtn(vformdst, dst, shifted_src);

	2271 }

	2272

	2273 LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst,

	2274 const LogicVRegister& src, int shift) {

	2275 SimVRegister temp;

	2276 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));

	2277 VectorFormat vformdst = vform;

	2278 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);

	2279 return sqxtn(vformdst, dst, shifted_src);

	2280 }

	2281

	2282 LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst,

	2283 const LogicVRegister& src, int shift) {

	2284 SimVRegister temp;

	2285 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);

	2286 VectorFormat vformdst = vform;

	2287 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);

	2288 return sqxtn(vformdst, dst, shifted_src);

	2289 }

	2290

	2291 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst,

	2292 const LogicVRegister& src, int shift) {

	2293 SimVRegister temp;

	2294 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));

	2295 VectorFormat vformdst = vform;

	2296 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);

	2297 return sqxtn(vformdst, dst, shifted_src);

	2298 }

	2299

	2300 LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst,

	2301 const LogicVRegister& src, int shift) {

	2302 SimVRegister temp;

	2303 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);

	2304 VectorFormat vformdst = vform;

	2305 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);

	2306 return sqxtun(vformdst, dst, shifted_src);

	2307 }

	2308

	2309 LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst,

	2310 const LogicVRegister& src, int shift) {

	2311 SimVRegister temp;

	2312 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));

	2313 VectorFormat vformdst = vform;

	2314 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);

	2315 return sqxtun(vformdst, dst, shifted_src);

	2316 }

	2317

	2318 LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst,

	2319 const LogicVRegister& src, int shift) {

	2320 SimVRegister temp;

	2321 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);

	2322 VectorFormat vformdst = vform;

	2323 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);

	2324 return sqxtun(vformdst, dst, shifted_src);

	2325 }

	2326

	2327 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst,

	2328 const LogicVRegister& src, int shift) {

	2329 SimVRegister temp;

	2330 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));

	2331 VectorFormat vformdst = vform;

	2332 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);

	2333 return sqxtun(vformdst, dst, shifted_src);

	2334 }

	2335

	2336 LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst,

	2337 const LogicVRegister& src1,

	2338 const LogicVRegister& src2) {

	2339 SimVRegister temp1, temp2;

	2340 uxtl(vform, temp1, src1);

	2341 uxtl(vform, temp2, src2);

	2342 add(vform, dst, temp1, temp2);

	2343 return dst;

	2344 }

	2345

	2346 LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst,

	2347 const LogicVRegister& src1,

	2348 const LogicVRegister& src2) {

	2349 SimVRegister temp1, temp2;

	2350 uxtl2(vform, temp1, src1);

	2351 uxtl2(vform, temp2, src2);

	2352 add(vform, dst, temp1, temp2);

	2353 return dst;

	2354 }

	2355

	2356 LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst,

	2357 const LogicVRegister& src1,

	2358 const LogicVRegister& src2) {

	2359 SimVRegister temp;

	2360 uxtl(vform, temp, src2);

	2361 add(vform, dst, src1, temp);

	2362 return dst;

	2363 }

	2364

	2365 LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst,

	2366 const LogicVRegister& src1,

	2367 const LogicVRegister& src2) {

	2368 SimVRegister temp;

	2369 uxtl2(vform, temp, src2);

	2370 add(vform, dst, src1, temp);

	2371 return dst;

	2372 }

	2373

	2374 LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst,

	2375 const LogicVRegister& src1,

	2376 const LogicVRegister& src2) {

	2377 SimVRegister temp1, temp2;

	2378 sxtl(vform, temp1, src1);

	2379 sxtl(vform, temp2, src2);

	2380 add(vform, dst, temp1, temp2);

	2381 return dst;

	2382 }

	2383

	2384 LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst,

	2385 const LogicVRegister& src1,

	2386 const LogicVRegister& src2) {

	2387 SimVRegister temp1, temp2;

	2388 sxtl2(vform, temp1, src1);

	2389 sxtl2(vform, temp2, src2);

	2390 add(vform, dst, temp1, temp2);

	2391 return dst;

	2392 }

	2393

	2394 LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst,

	2395 const LogicVRegister& src1,

	2396 const LogicVRegister& src2) {

	2397 SimVRegister temp;

	2398 sxtl(vform, temp, src2);

	2399 add(vform, dst, src1, temp);

	2400 return dst;

	2401 }

	2402

	2403 LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst,

	2404 const LogicVRegister& src1,

	2405 const LogicVRegister& src2) {

	2406 SimVRegister temp;

	2407 sxtl2(vform, temp, src2);

	2408 add(vform, dst, src1, temp);

	2409 return dst;

	2410 }

	2411

	2412 LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst,

	2413 const LogicVRegister& src1,

	2414 const LogicVRegister& src2) {

	2415 SimVRegister temp1, temp2;

	2416 uxtl(vform, temp1, src1);

	2417 uxtl(vform, temp2, src2);

	2418 sub(vform, dst, temp1, temp2);

	2419 return dst;

	2420 }

	2421

	2422 LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst,

	2423 const LogicVRegister& src1,

	2424 const LogicVRegister& src2) {

	2425 SimVRegister temp1, temp2;

	2426 uxtl2(vform, temp1, src1);

	2427 uxtl2(vform, temp2, src2);

	2428 sub(vform, dst, temp1, temp2);

	2429 return dst;

	2430 }

	2431

	2432 LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst,

	2433 const LogicVRegister& src1,

	2434 const LogicVRegister& src2) {

	2435 SimVRegister temp;

	2436 uxtl(vform, temp, src2);

	2437 sub(vform, dst, src1, temp);

	2438 return dst;

	2439 }

	2440

	2441 LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst,

	2442 const LogicVRegister& src1,

	2443 const LogicVRegister& src2) {

	2444 SimVRegister temp;

	2445 uxtl2(vform, temp, src2);

	2446 sub(vform, dst, src1, temp);

	2447 return dst;

	2448 }

	2449

	2450 LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst,

	2451 const LogicVRegister& src1,

	2452 const LogicVRegister& src2) {

	2453 SimVRegister temp1, temp2;

	2454 sxtl(vform, temp1, src1);

	2455 sxtl(vform, temp2, src2);

	2456 sub(vform, dst, temp1, temp2);

	2457 return dst;

	2458 }

	2459

	2460 LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst,

	2461 const LogicVRegister& src1,

	2462 const LogicVRegister& src2) {

	2463 SimVRegister temp1, temp2;

	2464 sxtl2(vform, temp1, src1);

	2465 sxtl2(vform, temp2, src2);

	2466 sub(vform, dst, temp1, temp2);

	2467 return dst;

	2468 }

	2469

	2470 LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst,

	2471 const LogicVRegister& src1,

	2472 const LogicVRegister& src2) {

	2473 SimVRegister temp;

	2474 sxtl(vform, temp, src2);

	2475 sub(vform, dst, src1, temp);

	2476 return dst;

	2477 }

	2478

	2479 LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst,

	2480 const LogicVRegister& src1,

	2481 const LogicVRegister& src2) {

	2482 SimVRegister temp;

	2483 sxtl2(vform, temp, src2);

	2484 sub(vform, dst, src1, temp);

	2485 return dst;

	2486 }

	2487

	2488 LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst,

	2489 const LogicVRegister& src1,

	2490 const LogicVRegister& src2) {

	2491 SimVRegister temp1, temp2;

	2492 uxtl(vform, temp1, src1);

	2493 uxtl(vform, temp2, src2);

	2494 uaba(vform, dst, temp1, temp2);

	2495 return dst;

	2496 }

	2497

	2498 LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst,

	2499 const LogicVRegister& src1,

	2500 const LogicVRegister& src2) {

	2501 SimVRegister temp1, temp2;

	2502 uxtl2(vform, temp1, src1);

	2503 uxtl2(vform, temp2, src2);

	2504 uaba(vform, dst, temp1, temp2);

	2505 return dst;

	2506 }

	2507

	2508 LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst,

	2509 const LogicVRegister& src1,

	2510 const LogicVRegister& src2) {

	2511 SimVRegister temp1, temp2;

	2512 sxtl(vform, temp1, src1);

	2513 sxtl(vform, temp2, src2);

	2514 saba(vform, dst, temp1, temp2);

	2515 return dst;

	2516 }

	2517

	2518 LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst,

	2519 const LogicVRegister& src1,

	2520 const LogicVRegister& src2) {

	2521 SimVRegister temp1, temp2;

	2522 sxtl2(vform, temp1, src1);

	2523 sxtl2(vform, temp2, src2);

	2524 saba(vform, dst, temp1, temp2);

	2525 return dst;

	2526 }

	2527

	2528 LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst,

	2529 const LogicVRegister& src1,

	2530 const LogicVRegister& src2) {

	2531 SimVRegister temp1, temp2;

	2532 uxtl(vform, temp1, src1);

	2533 uxtl(vform, temp2, src2);

	2534 AbsDiff(vform, dst, temp1, temp2, false);

	2535 return dst;

	2536 }

	2537

	2538 LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst,

	2539 const LogicVRegister& src1,

	2540 const LogicVRegister& src2) {

	2541 SimVRegister temp1, temp2;

	2542 uxtl2(vform, temp1, src1);

	2543 uxtl2(vform, temp2, src2);

	2544 AbsDiff(vform, dst, temp1, temp2, false);

	2545 return dst;

	2546 }

	2547

	2548 LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst,

	2549 const LogicVRegister& src1,

	2550 const LogicVRegister& src2) {

	2551 SimVRegister temp1, temp2;

	2552 sxtl(vform, temp1, src1);

	2553 sxtl(vform, temp2, src2);

	2554 AbsDiff(vform, dst, temp1, temp2, true);

	2555 return dst;

	2556 }

	2557

	2558 LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst,

	2559 const LogicVRegister& src1,

	2560 const LogicVRegister& src2) {

	2561 SimVRegister temp1, temp2;

	2562 sxtl2(vform, temp1, src1);

	2563 sxtl2(vform, temp2, src2);

	2564 AbsDiff(vform, dst, temp1, temp2, true);

	2565 return dst;

	2566 }

	2567

	2568 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,

	2569 const LogicVRegister& src1,

	2570 const LogicVRegister& src2) {

	2571 SimVRegister temp1, temp2;

	2572 uxtl(vform, temp1, src1);

	2573 uxtl(vform, temp2, src2);

	2574 mul(vform, dst, temp1, temp2);

	2575 return dst;

	2576 }

	2577

	2578 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,

	2579 const LogicVRegister& src1,

	2580 const LogicVRegister& src2) {

	2581 SimVRegister temp1, temp2;

	2582 uxtl2(vform, temp1, src1);

	2583 uxtl2(vform, temp2, src2);

	2584 mul(vform, dst, temp1, temp2);

	2585 return dst;

	2586 }

	2587

	2588 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,

	2589 const LogicVRegister& src1,

	2590 const LogicVRegister& src2) {

	2591 SimVRegister temp1, temp2;

	2592 sxtl(vform, temp1, src1);

	2593 sxtl(vform, temp2, src2);

	2594 mul(vform, dst, temp1, temp2);

	2595 return dst;

	2596 }

	2597

	2598 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,

	2599 const LogicVRegister& src1,

	2600 const LogicVRegister& src2) {

	2601 SimVRegister temp1, temp2;

	2602 sxtl2(vform, temp1, src1);

	2603 sxtl2(vform, temp2, src2);

	2604 mul(vform, dst, temp1, temp2);

	2605 return dst;

	2606 }

	2607

	2608 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,

	2609 const LogicVRegister& src1,

	2610 const LogicVRegister& src2) {

	2611 SimVRegister temp1, temp2;

	2612 uxtl(vform, temp1, src1);

	2613 uxtl(vform, temp2, src2);

	2614 mls(vform, dst, temp1, temp2);

	2615 return dst;

	2616 }

	2617

	2618 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,

	2619 const LogicVRegister& src1,

	2620 const LogicVRegister& src2) {

	2621 SimVRegister temp1, temp2;

	2622 uxtl2(vform, temp1, src1);

	2623 uxtl2(vform, temp2, src2);

	2624 mls(vform, dst, temp1, temp2);

	2625 return dst;

	2626 }

	2627

	2628 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,

	2629 const LogicVRegister& src1,

	2630 const LogicVRegister& src2) {

	2631 SimVRegister temp1, temp2;

	2632 sxtl(vform, temp1, src1);

	2633 sxtl(vform, temp2, src2);

	2634 mls(vform, dst, temp1, temp2);

	2635 return dst;

	2636 }

	2637

	2638 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,

	2639 const LogicVRegister& src1,

	2640 const LogicVRegister& src2) {

	2641 SimVRegister temp1, temp2;

	2642 sxtl2(vform, temp1, src1);

	2643 sxtl2(vform, temp2, src2);

	2644 mls(vform, dst, temp1, temp2);

	2645 return dst;

	2646 }

	2647

	2648 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,

	2649 const LogicVRegister& src1,

	2650 const LogicVRegister& src2) {

	2651 SimVRegister temp1, temp2;

	2652 uxtl(vform, temp1, src1);

	2653 uxtl(vform, temp2, src2);

	2654 mla(vform, dst, temp1, temp2);

	2655 return dst;

	2656 }

	2657

	2658 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,

	2659 const LogicVRegister& src1,

	2660 const LogicVRegister& src2) {

	2661 SimVRegister temp1, temp2;

	2662 uxtl2(vform, temp1, src1);

	2663 uxtl2(vform, temp2, src2);

	2664 mla(vform, dst, temp1, temp2);

	2665 return dst;

	2666 }

	2667

	2668 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,

	2669 const LogicVRegister& src1,

	2670 const LogicVRegister& src2) {

	2671 SimVRegister temp1, temp2;

	2672 sxtl(vform, temp1, src1);

	2673 sxtl(vform, temp2, src2);

	2674 mla(vform, dst, temp1, temp2);

	2675 return dst;

	2676 }

	2677

	2678 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,

	2679 const LogicVRegister& src1,

	2680 const LogicVRegister& src2) {

	2681 SimVRegister temp1, temp2;

	2682 sxtl2(vform, temp1, src1);

	2683 sxtl2(vform, temp2, src2);

	2684 mla(vform, dst, temp1, temp2);

	2685 return dst;

	2686 }

	2687

	2688 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,

	2689 const LogicVRegister& src1,

	2690 const LogicVRegister& src2) {

	2691 SimVRegister temp;

	2692 LogicVRegister product = sqdmull(vform, temp, src1, src2);

	2693 return add(vform, dst, dst, product).SignedSaturate(vform);

	2694 }

	2695

	2696 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,

	2697 const LogicVRegister& src1,

	2698 const LogicVRegister& src2) {

	2699 SimVRegister temp;

	2700 LogicVRegister product = sqdmull2(vform, temp, src1, src2);

	2701 return add(vform, dst, dst, product).SignedSaturate(vform);

	2702 }

	2703

	2704 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,

	2705 const LogicVRegister& src1,

	2706 const LogicVRegister& src2) {

	2707 SimVRegister temp;

	2708 LogicVRegister product = sqdmull(vform, temp, src1, src2);

	2709 return sub(vform, dst, dst, product).SignedSaturate(vform);

	2710 }

	2711

	2712 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,

	2713 const LogicVRegister& src1,

	2714 const LogicVRegister& src2) {

	2715 SimVRegister temp;

	2716 LogicVRegister product = sqdmull2(vform, temp, src1, src2);

	2717 return sub(vform, dst, dst, product).SignedSaturate(vform);

	2718 }

	2719

	2720 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,

	2721 const LogicVRegister& src1,

	2722 const LogicVRegister& src2) {

	2723 SimVRegister temp;

	2724 LogicVRegister product = smull(vform, temp, src1, src2);

	2725 return add(vform, dst, product, product).SignedSaturate(vform);

	2726 }

	2727

	2728 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,

	2729 const LogicVRegister& src1,

	2730 const LogicVRegister& src2) {

	2731 SimVRegister temp;

	2732 LogicVRegister product = smull2(vform, temp, src1, src2);

	2733 return add(vform, dst, product, product).SignedSaturate(vform);

	2734 }

	2735

	2736 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,

	2737 const LogicVRegister& src1,

	2738 const LogicVRegister& src2, bool round) {

	2739 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.

	2740 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)

	2741 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.

	2742

	2743 int esize = LaneSizeInBitsFromFormat(vform);

	2744 int round_const = round ? (1 << (esize - 2)) : 0;

	2745 int64_t product;

	2746

	2747 dst.ClearForWrite(vform);

	2748 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	2749 product = src1.Int(vform, i) * src2.Int(vform, i);

	2750 product += round_const;

	2751 product = product >> (esize - 1);

	2752

	2753 if (product > MaxIntFromFormat(vform)) {

	2754 product = MaxIntFromFormat(vform);

	2755 } else if (product < MinIntFromFormat(vform)) {

	2756 product = MinIntFromFormat(vform);

	2757 }

	2758 dst.SetInt(vform, i, product);

	2759 }

	2760 return dst;

	2761 }

	2762

	2763 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,

	2764 const LogicVRegister& src1,

	2765 const LogicVRegister& src2) {

	2766 return sqrdmulh(vform, dst, src1, src2, false);

	2767 }

	2768

	2769 LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst,

	2770 const LogicVRegister& src1,

	2771 const LogicVRegister& src2) {

	2772 SimVRegister temp;

	2773 add(VectorFormatDoubleWidth(vform), temp, src1, src2);

	2774 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2775 return dst;

	2776 }

	2777

	2778 LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst,

	2779 const LogicVRegister& src1,

	2780 const LogicVRegister& src2) {

	2781 SimVRegister temp;

	2782 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);

	2783 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2784 return dst;

	2785 }

	2786

	2787 LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst,

	2788 const LogicVRegister& src1,

	2789 const LogicVRegister& src2) {

	2790 SimVRegister temp;

	2791 add(VectorFormatDoubleWidth(vform), temp, src1, src2);

	2792 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2793 return dst;

	2794 }

	2795

	2796 LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst,

	2797 const LogicVRegister& src1,

	2798 const LogicVRegister& src2) {

	2799 SimVRegister temp;

	2800 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);

	2801 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2802 return dst;

	2803 }

	2804

	2805 LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst,

	2806 const LogicVRegister& src1,

	2807 const LogicVRegister& src2) {

	2808 SimVRegister temp;

	2809 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);

	2810 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2811 return dst;

	2812 }

	2813

	2814 LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst,

	2815 const LogicVRegister& src1,

	2816 const LogicVRegister& src2) {

	2817 SimVRegister temp;

	2818 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);

	2819 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2820 return dst;

	2821 }

	2822

	2823 LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst,

	2824 const LogicVRegister& src1,

	2825 const LogicVRegister& src2) {

	2826 SimVRegister temp;

	2827 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);

	2828 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2829 return dst;

	2830 }

	2831

	2832 LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst,

	2833 const LogicVRegister& src1,

	2834 const LogicVRegister& src2) {

	2835 SimVRegister temp;

	2836 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);

	2837 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));

	2838 return dst;

	2839 }

	2840

	2841 LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst,

	2842 const LogicVRegister& src1,

	2843 const LogicVRegister& src2) {

	2844 uint64_t result[16];

	2845 int laneCount = LaneCountFromFormat(vform);

	2846 int pairs = laneCount / 2;

	2847 for (int i = 0; i < pairs; ++i) {

	2848 result[2 * i] = src1.Uint(vform, 2 * i);

	2849 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);

	2850 }

	2851

	2852 dst.SetUintArray(vform, result);

	2853 return dst;

	2854 }

	2855

	2856 LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst,

	2857 const LogicVRegister& src1,

	2858 const LogicVRegister& src2) {

	2859 uint64_t result[16];

	2860 int laneCount = LaneCountFromFormat(vform);

	2861 int pairs = laneCount / 2;

	2862 for (int i = 0; i < pairs; ++i) {

	2863 result[2 * i] = src1.Uint(vform, (2 * i) + 1);

	2864 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);

	2865 }

	2866

	2867 dst.SetUintArray(vform, result);

	2868 return dst;

	2869 }

	2870

	2871 LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst,

	2872 const LogicVRegister& src1,

	2873 const LogicVRegister& src2) {

	2874 uint64_t result[16];

	2875 int laneCount = LaneCountFromFormat(vform);

	2876 int pairs = laneCount / 2;

	2877 for (int i = 0; i < pairs; ++i) {

	2878 result[2 * i] = src1.Uint(vform, i);

	2879 result[(2 * i) + 1] = src2.Uint(vform, i);

	2880 }

	2881

	2882 dst.SetUintArray(vform, result);

	2883 return dst;

	2884 }

	2885

	2886 LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst,

	2887 const LogicVRegister& src1,

	2888 const LogicVRegister& src2) {

	2889 uint64_t result[16];

	2890 int laneCount = LaneCountFromFormat(vform);

	2891 int pairs = laneCount / 2;

	2892 for (int i = 0; i < pairs; ++i) {

	2893 result[2 * i] = src1.Uint(vform, pairs + i);

	2894 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);

	2895 }

	2896

	2897 dst.SetUintArray(vform, result);

	2898 return dst;

	2899 }

	2900

	2901 LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst,

	2902 const LogicVRegister& src1,

	2903 const LogicVRegister& src2) {

	2904 uint64_t result[32];

	2905 int laneCount = LaneCountFromFormat(vform);

	2906 for (int i = 0; i < laneCount; ++i) {

	2907 result[i] = src1.Uint(vform, i);

	2908 result[laneCount + i] = src2.Uint(vform, i);

	2909 }

	2910

	2911 dst.ClearForWrite(vform);

	2912 for (int i = 0; i < laneCount; ++i) {

	2913 dst.SetUint(vform, i, result[2 * i]);

	2914 }

	2915 return dst;

	2916 }

	2917

	2918 LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst,

	2919 const LogicVRegister& src1,

	2920 const LogicVRegister& src2) {

	2921 uint64_t result[32];

	2922 int laneCount = LaneCountFromFormat(vform);

	2923 for (int i = 0; i < laneCount; ++i) {

	2924 result[i] = src1.Uint(vform, i);

	2925 result[laneCount + i] = src2.Uint(vform, i);

	2926 }

	2927

	2928 dst.ClearForWrite(vform);

	2929 for (int i = 0; i < laneCount; ++i) {

	2930 dst.SetUint(vform, i, result[(2 * i) + 1]);

	2931 }

	2932 return dst;

	2933 }

	2934

	2935 template <typename T>

	2936 T Simulator::FPAdd(T op1, T op2) {

	2937 T result = FPProcessNaNs(op1, op2);

	2938 if (std::isnan(result)) return result;

	2939

	2940 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {

	2941 // inf + -inf returns the default NaN.

	2942 FPProcessException();

	2943 return FPDefaultNaN<T>();

	2944 } else {

	2945 // Other cases should be handled by standard arithmetic.

	2946 return op1 + op2;

	2947 }

	2948 }

	2949

	2950 template <typename T>

	2951 T Simulator::FPSub(T op1, T op2) {

	2952 // NaNs should be handled elsewhere.

	2953 DCHECK(!std::isnan(op1) && !std::isnan(op2));

	2954

	2955 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {

	2956 // inf - inf returns the default NaN.

	2957 FPProcessException();

	2958 return FPDefaultNaN<T>();

	2959 } else {

	2960 // Other cases should be handled by standard arithmetic.

	2961 return op1 - op2;

	2962 }

	2963 }

	2964

	2965 template <typename T>

	2966 T Simulator::FPMul(T op1, T op2) {

	2967 // NaNs should be handled elsewhere.

	2968 DCHECK(!std::isnan(op1) && !std::isnan(op2));

	2969

	2970 if ((std::isinf(op1) && (op2 == 0.0)) \|\| (std::isinf(op2) && (op1 == 0.0))) {

	2971 // inf * 0.0 returns the default NaN.

	2972 FPProcessException();

	2973 return FPDefaultNaN<T>();

	2974 } else {

	2975 // Other cases should be handled by standard arithmetic.

	2976 return op1 * op2;

	2977 }

	2978 }

	2979

	2980 template <typename T>

	2981 T Simulator::FPMulx(T op1, T op2) {

	2982 if ((std::isinf(op1) && (op2 == 0.0)) \|\| (std::isinf(op2) && (op1 == 0.0))) {

	2983 // inf * 0.0 returns +/-2.0.

	2984 T two = 2.0;

	2985 return copysign(1.0, op1) * copysign(1.0, op2) * two;

	2986 }

	2987 return FPMul(op1, op2);

	2988 }

	2989

	2990 template <typename T>

	2991 T Simulator::FPMulAdd(T a, T op1, T op2) {

	2992 T result = FPProcessNaNs3(a, op1, op2);

	2993

	2994 T sign_a = copysign(1.0, a);

	2995 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);

	2996 bool isinf_prod = std::isinf(op1) \|\| std::isinf(op2);

	2997 bool operation_generates_nan =

	2998 (std::isinf(op1) && (op2 == 0.0)) \|\| // inf * 0.0

	2999 (std::isinf(op2) && (op1 == 0.0)) \|\| // 0.0 * inf

	3000 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf

	3001

	3002 if (std::isnan(result)) {

	3003 // Generated NaNs override quiet NaNs propagated from a.

	3004 if (operation_generates_nan && IsQuietNaN(a)) {

	3005 FPProcessException();

	3006 return FPDefaultNaN<T>();

	3007 } else {

	3008 return result;

	3009 }

	3010 }

	3011

	3012 // If the operation would produce a NaN, return the default NaN.

	3013 if (operation_generates_nan) {

	3014 FPProcessException();

	3015 return FPDefaultNaN<T>();

	3016 }

	3017

	3018 // Work around broken fma implementations for exact zero results: The sign of

	3019 // exact 0.0 results is positive unless both a and op1 * op2 are negative.

	3020 if (((op1 == 0.0) \|\| (op2 == 0.0)) && (a == 0.0)) {

	3021 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;

	3022 }

	3023

	3024 result = FusedMultiplyAdd(op1, op2, a);

	3025 DCHECK(!std::isnan(result));

	3026

	3027 // Work around broken fma implementations for rounded zero results: If a is

	3028 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.

	3029 if ((a == 0.0) && (result == 0.0)) {

	3030 return copysign(0.0, sign_prod);

	3031 }

	3032

	3033 return result;

	3034 }

	3035

	3036 template <typename T>

	3037 T Simulator::FPDiv(T op1, T op2) {

	3038 // NaNs should be handled elsewhere.

	3039 DCHECK(!std::isnan(op1) && !std::isnan(op2));

	3040

	3041 if ((std::isinf(op1) && std::isinf(op2)) \|\| ((op1 == 0.0) && (op2 == 0.0))) {

	3042 // inf / inf and 0.0 / 0.0 return the default NaN.

	3043 FPProcessException();

	3044 return FPDefaultNaN<T>();

	3045 } else {

	3046 if (op2 == 0.0) {

	3047 FPProcessException();

	3048 if (!std::isnan(op1)) {

	3049 double op1_sign = copysign(1.0, op1);

	3050 double op2_sign = copysign(1.0, op2);

	3051 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);

	3052 }

	3053 }

	3054

	3055 // Other cases should be handled by standard arithmetic.

	3056 return op1 / op2;

	3057 }

	3058 }

	3059

	3060 template <typename T>

	3061 T Simulator::FPSqrt(T op) {

	3062 if (std::isnan(op)) {

	3063 return FPProcessNaN(op);

	3064 } else if (op < 0.0) {

	3065 FPProcessException();

	3066 return FPDefaultNaN<T>();

	3067 } else {

	3068 return sqrt(op);

	3069 }

	3070 }

	3071

	3072 template <typename T>

	3073 T Simulator::FPMax(T a, T b) {

	3074 T result = FPProcessNaNs(a, b);

	3075 if (std::isnan(result)) return result;

	3076

	3077 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {

	3078 // a and b are zero, and the sign differs: return +0.0.

	3079 return 0.0;

	3080 } else {

	3081 return (a > b) ? a : b;

	3082 }

	3083 }

	3084

	3085 template <typename T>

	3086 T Simulator::FPMaxNM(T a, T b) {

	3087 if (IsQuietNaN(a) && !IsQuietNaN(b)) {

	3088 a = kFP64NegativeInfinity;

	3089 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {

	3090 b = kFP64NegativeInfinity;

	3091 }

	3092

	3093 T result = FPProcessNaNs(a, b);

	3094 return std::isnan(result) ? result : FPMax(a, b);

	3095 }

	3096

	3097 template <typename T>

	3098 T Simulator::FPMin(T a, T b) {

	3099 T result = FPProcessNaNs(a, b);

	3100 if (std::isnan(result)) return result;

	3101

	3102 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {

	3103 // a and b are zero, and the sign differs: return -0.0.

	3104 return -0.0;

	3105 } else {

	3106 return (a < b) ? a : b;

	3107 }

	3108 }

	3109

	3110 template <typename T>

	3111 T Simulator::FPMinNM(T a, T b) {

	3112 if (IsQuietNaN(a) && !IsQuietNaN(b)) {

	3113 a = kFP64PositiveInfinity;

	3114 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {

	3115 b = kFP64PositiveInfinity;

	3116 }

	3117

	3118 T result = FPProcessNaNs(a, b);

	3119 return std::isnan(result) ? result : FPMin(a, b);

	3120 }

	3121

	3122 template <typename T>

	3123 T Simulator::FPRecipStepFused(T op1, T op2) {

	3124 const T two = 2.0;

	3125 if ((std::isinf(op1) && (op2 == 0.0)) \|\|

	3126 ((op1 == 0.0) && (std::isinf(op2)))) {

	3127 return two;

	3128 } else if (std::isinf(op1) \|\| std::isinf(op2)) {

	3129 // Return +inf if signs match, otherwise -inf.

	3130 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity

	3131 : kFP64NegativeInfinity;

	3132 } else {

	3133 return FusedMultiplyAdd(op1, op2, two);

	3134 }

	3135 }

	3136

	3137 template <typename T>

	3138 T Simulator::FPRSqrtStepFused(T op1, T op2) {

	3139 const T one_point_five = 1.5;

	3140 const T two = 2.0;

	3141

	3142 if ((std::isinf(op1) && (op2 == 0.0)) \|\|

	3143 ((op1 == 0.0) && (std::isinf(op2)))) {

	3144 return one_point_five;

	3145 } else if (std::isinf(op1) \|\| std::isinf(op2)) {

	3146 // Return +inf if signs match, otherwise -inf.

	3147 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity

	3148 : kFP64NegativeInfinity;

	3149 } else {

	3150 // The multiply-add-halve operation must be fully fused, so avoid interim

	3151 // rounding by checking which operand can be losslessly divided by two

	3152 // before doing the multiply-add.

	3153 if (std::isnormal(op1 / two)) {

	3154 return FusedMultiplyAdd(op1 / two, op2, one_point_five);

	3155 } else if (std::isnormal(op2 / two)) {

	3156 return FusedMultiplyAdd(op1, op2 / two, one_point_five);

	3157 } else {

	3158 // Neither operand is normal after halving: the result is dominated by

	3159 // the addition term, so just return that.

	3160 return one_point_five;

	3161 }

	3162 }

	3163 }

	3164

	3165 double Simulator::FPRoundInt(double value, FPRounding round_mode) {

	3166 if ((value == 0.0) \|\| (value == kFP64PositiveInfinity) \|\|

	3167 (value == kFP64NegativeInfinity)) {

	3168 return value;

	3169 } else if (std::isnan(value)) {

	3170 return FPProcessNaN(value);

	3171 }

	3172

	3173 double int_result = std::floor(value);

	3174 double error = value - int_result;

	3175 switch (round_mode) {

	3176 case FPTieAway: {

	3177 // Take care of correctly handling the range ]-0.5, -0.0], which must

	3178 // yield -0.0.

	3179 if ((-0.5 < value) && (value < 0.0)) {

	3180 int_result = -0.0;

	3181

	3182 } else if ((error > 0.5) \|\| ((error == 0.5) && (int_result >= 0.0))) {

	3183 // If the error is greater than 0.5, or is equal to 0.5 and the integer

	3184 // result is positive, round up.

	3185 int_result++;

	3186 }

	3187 break;

	3188 }

	3189 case FPTieEven: {

	3190 // Take care of correctly handling the range [-0.5, -0.0], which must

	3191 // yield -0.0.

	3192 if ((-0.5 <= value) && (value < 0.0)) {

	3193 int_result = -0.0;

	3194

	3195 // If the error is greater than 0.5, or is equal to 0.5 and the integer

	3196 // result is odd, round up.

	3197 } else if ((error > 0.5) \|\|

	3198 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {

	3199 int_result++;

	3200 }

	3201 break;

	3202 }

	3203 case FPZero: {

	3204 // If value>0 then we take floor(value)

	3205 // otherwise, ceil(value).

	3206 if (value < 0) {

	3207 int_result = ceil(value);

	3208 }

	3209 break;

	3210 }

	3211 case FPNegativeInfinity: {

	3212 // We always use floor(value).

	3213 break;

	3214 }

	3215 case FPPositiveInfinity: {

	3216 // Take care of correctly handling the range ]-1.0, -0.0], which must

	3217 // yield -0.0.

	3218 if ((-1.0 < value) && (value < 0.0)) {

	3219 int_result = -0.0;

	3220

	3221 // If the error is non-zero, round up.

	3222 } else if (error > 0.0) {

	3223 int_result++;

	3224 }

	3225 break;

	3226 }

	3227 default:

	3228 UNIMPLEMENTED();

	3229 }

	3230 return int_result;

	3231 }

	3232

	3233 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {

	3234 value = FPRoundInt(value, rmode);

	3235 if (value >= kWMaxInt) {

	3236 return kWMaxInt;

	3237 } else if (value < kWMinInt) {

	3238 return kWMinInt;

	3239 }

	3240 return std::isnan(value) ? 0 : static_cast<int32_t>(value);

	3241 }

	3242

	3243 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {

	3244 value = FPRoundInt(value, rmode);

	3245 if (value >= kXMaxInt) {

	3246 return kXMaxInt;

	3247 } else if (value < kXMinInt) {

	3248 return kXMinInt;

	3249 }

	3250 return std::isnan(value) ? 0 : static_cast<int64_t>(value);

	3251 }

	3252

	3253 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {

	3254 value = FPRoundInt(value, rmode);

	3255 if (value >= kWMaxUInt) {

	3256 return kWMaxUInt;

	3257 } else if (value < 0.0) {

	3258 return 0;

	3259 }

	3260 return std::isnan(value) ? 0 : static_cast<uint32_t>(value);

	3261 }

	3262

	3263 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {

	3264 value = FPRoundInt(value, rmode);

	3265 if (value >= kXMaxUInt) {

	3266 return kXMaxUInt;

	3267 } else if (value < 0.0) {

	3268 return 0;

	3269 }

	3270 return std::isnan(value) ? 0 : static_cast<uint64_t>(value);

	3271 }

	3272

	3273 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \

	3274 template <typename T> \

	3275 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \

	3276 const LogicVRegister& src1, \

	3277 const LogicVRegister& src2) { \

	3278 dst.ClearForWrite(vform); \

	3279 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \

	3280 T op1 = src1.Float<T>(i); \

	3281 T op2 = src2.Float<T>(i); \

	3282 T result; \

	3283 if (PROCNAN) { \

	3284 result = FPProcessNaNs(op1, op2); \

	3285 if (!std::isnan(result)) { \

	3286 result = OP(op1, op2); \

	3287 } \

	3288 } else { \

	3289 result = OP(op1, op2); \

	3290 } \

	3291 dst.SetFloat(i, result); \

	3292 } \

	3293 return dst; \

	3294 } \

	3295 \

	3296 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \

	3297 const LogicVRegister& src1, \

	3298 const LogicVRegister& src2) { \

	3299 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { \

	3300 FN<float>(vform, dst, src1, src2); \

	3301 } else { \

	3302 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); \

	3303 FN<double>(vform, dst, src1, src2); \

	3304 } \

	3305 return dst; \

	3306 }

	3307 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)

	3308 #undef DEFINE_NEON_FP_VECTOR_OP

	3309

	3310 LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst,

	3311 const LogicVRegister& src1,

	3312 const LogicVRegister& src2) {

	3313 SimVRegister temp;

	3314 LogicVRegister product = fmul(vform, temp, src1, src2);

	3315 return fneg(vform, dst, product);

	3316 }

	3317

	3318 template <typename T>

	3319 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,

	3320 const LogicVRegister& src1,

	3321 const LogicVRegister& src2) {

	3322 dst.ClearForWrite(vform);

	3323 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3324 T op1 = -src1.Float<T>(i);

	3325 T op2 = src2.Float<T>(i);

	3326 T result = FPProcessNaNs(op1, op2);

	3327 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));

	3328 }

	3329 return dst;

	3330 }

	3331

	3332 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,

	3333 const LogicVRegister& src1,

	3334 const LogicVRegister& src2) {

	3335 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3336 frecps<float>(vform, dst, src1, src2);

	3337 } else {

	3338 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3339 frecps<double>(vform, dst, src1, src2);

	3340 }

	3341 return dst;

	3342 }

	3343

	3344 template <typename T>

	3345 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,

	3346 const LogicVRegister& src1,

	3347 const LogicVRegister& src2) {

	3348 dst.ClearForWrite(vform);

	3349 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3350 T op1 = -src1.Float<T>(i);

	3351 T op2 = src2.Float<T>(i);

	3352 T result = FPProcessNaNs(op1, op2);

	3353 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));

	3354 }

	3355 return dst;

	3356 }

	3357

	3358 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,

	3359 const LogicVRegister& src1,

	3360 const LogicVRegister& src2) {

	3361 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3362 frsqrts<float>(vform, dst, src1, src2);

	3363 } else {

	3364 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3365 frsqrts<double>(vform, dst, src1, src2);

	3366 }

	3367 return dst;

	3368 }

	3369

	3370 template <typename T>

	3371 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,

	3372 const LogicVRegister& src1,

	3373 const LogicVRegister& src2, Condition cond) {

	3374 dst.ClearForWrite(vform);

	3375 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3376 bool result = false;

	3377 T op1 = src1.Float<T>(i);

	3378 T op2 = src2.Float<T>(i);

	3379 T nan_result = FPProcessNaNs(op1, op2);

	3380 if (!std::isnan(nan_result)) {

	3381 switch (cond) {

	3382 case eq:

	3383 result = (op1 == op2);

	3384 break;

	3385 case ge:

	3386 result = (op1 >= op2);

	3387 break;

	3388 case gt:

	3389 result = (op1 > op2);

	3390 break;

	3391 case le:

	3392 result = (op1 <= op2);

	3393 break;

	3394 case lt:

	3395 result = (op1 < op2);

	3396 break;

	3397 default:

	3398 UNREACHABLE();

	3399 break;

	3400 }

	3401 }

	3402 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);

	3403 }

	3404 return dst;

	3405 }

	3406

	3407 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,

	3408 const LogicVRegister& src1,

	3409 const LogicVRegister& src2, Condition cond) {

	3410 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3411 fcmp<float>(vform, dst, src1, src2, cond);

	3412 } else {

	3413 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3414 fcmp<double>(vform, dst, src1, src2, cond);

	3415 }

	3416 return dst;

	3417 }

	3418

	3419 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst,

	3420 const LogicVRegister& src, Condition cond) {

	3421 SimVRegister temp;

	3422 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3423 LogicVRegister zero_reg =

	3424 dup_immediate(vform, temp, bit_cast<uint32_t>(0.0f));

	3425 fcmp<float>(vform, dst, src, zero_reg, cond);

	3426 } else {

	3427 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3428 LogicVRegister zero_reg =

	3429 dup_immediate(vform, temp, bit_cast<uint64_t>(0.0));

	3430 fcmp<double>(vform, dst, src, zero_reg, cond);

	3431 }

	3432 return dst;

	3433 }

	3434

	3435 LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst,

	3436 const LogicVRegister& src1,

	3437 const LogicVRegister& src2, Condition cond) {

	3438 SimVRegister temp1, temp2;

	3439 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3440 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);

	3441 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);

	3442 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);

	3443 } else {

	3444 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3445 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);

	3446 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);

	3447 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);

	3448 }

	3449 return dst;

	3450 }

	3451

	3452 template <typename T>

	3453 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,

	3454 const LogicVRegister& src1,

	3455 const LogicVRegister& src2) {

	3456 dst.ClearForWrite(vform);

	3457 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3458 T op1 = src1.Float<T>(i);

	3459 T op2 = src2.Float<T>(i);

	3460 T acc = dst.Float<T>(i);

	3461 T result = FPMulAdd(acc, op1, op2);

	3462 dst.SetFloat(i, result);

	3463 }

	3464 return dst;

	3465 }

	3466

	3467 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,

	3468 const LogicVRegister& src1,

	3469 const LogicVRegister& src2) {

	3470 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3471 fmla<float>(vform, dst, src1, src2);

	3472 } else {

	3473 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3474 fmla<double>(vform, dst, src1, src2);

	3475 }

	3476 return dst;

	3477 }

	3478

	3479 template <typename T>

	3480 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,

	3481 const LogicVRegister& src1,

	3482 const LogicVRegister& src2) {

	3483 dst.ClearForWrite(vform);

	3484 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3485 T op1 = -src1.Float<T>(i);

	3486 T op2 = src2.Float<T>(i);

	3487 T acc = dst.Float<T>(i);

	3488 T result = FPMulAdd(acc, op1, op2);

	3489 dst.SetFloat(i, result);

	3490 }

	3491 return dst;

	3492 }

	3493

	3494 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,

	3495 const LogicVRegister& src1,

	3496 const LogicVRegister& src2) {

	3497 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3498 fmls<float>(vform, dst, src1, src2);

	3499 } else {

	3500 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3501 fmls<double>(vform, dst, src1, src2);

	3502 }

	3503 return dst;

	3504 }

	3505

	3506 template <typename T>

	3507 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,

	3508 const LogicVRegister& src) {

	3509 dst.ClearForWrite(vform);

	3510 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3511 T op = src.Float<T>(i);

	3512 op = -op;

	3513 dst.SetFloat(i, op);

	3514 }

	3515 return dst;

	3516 }

	3517

	3518 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,

	3519 const LogicVRegister& src) {

	3520 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3521 fneg<float>(vform, dst, src);

	3522 } else {

	3523 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3524 fneg<double>(vform, dst, src);

	3525 }

	3526 return dst;

	3527 }

	3528

	3529 template <typename T>

	3530 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,

	3531 const LogicVRegister& src) {

	3532 dst.ClearForWrite(vform);

	3533 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3534 T op = src.Float<T>(i);

	3535 if (copysign(1.0, op) < 0.0) {

	3536 op = -op;

	3537 }

	3538 dst.SetFloat(i, op);

	3539 }

	3540 return dst;

	3541 }

	3542

	3543 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,

	3544 const LogicVRegister& src) {

	3545 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3546 fabs_<float>(vform, dst, src);

	3547 } else {

	3548 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3549 fabs_<double>(vform, dst, src);

	3550 }

	3551 return dst;

	3552 }

	3553

	3554 LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst,

	3555 const LogicVRegister& src1,

	3556 const LogicVRegister& src2) {

	3557 SimVRegister temp;

	3558 fsub(vform, temp, src1, src2);

	3559 fabs_(vform, dst, temp);

	3560 return dst;

	3561 }

	3562

	3563 LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst,

	3564 const LogicVRegister& src) {

	3565 dst.ClearForWrite(vform);

	3566 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3567 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3568 float result = FPSqrt(src.Float<float>(i));

	3569 dst.SetFloat(i, result);

	3570 }

	3571 } else {

	3572 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3573 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3574 double result = FPSqrt(src.Float<double>(i));

	3575 dst.SetFloat(i, result);

	3576 }

	3577 }

	3578 return dst;

	3579 }

	3580

	3581 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \

	3582 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \

	3583 const LogicVRegister& src1, \

	3584 const LogicVRegister& src2) { \

	3585 SimVRegister temp1, temp2; \

	3586 uzp1(vform, temp1, src1, src2); \

	3587 uzp2(vform, temp2, src1, src2); \

	3588 FN(vform, dst, temp1, temp2); \

	3589 return dst; \

	3590 } \

	3591 \

	3592 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \

	3593 const LogicVRegister& src) { \

	3594 if (vform == kFormatS) { \

	3595 float result = OP(src.Float<float>(0), src.Float<float>(1)); \

	3596 dst.SetFloat(0, result); \

	3597 } else { \

	3598 DCHECK_EQ(vform, kFormatD); \

	3599 double result = OP(src.Float<double>(0), src.Float<double>(1)); \

	3600 dst.SetFloat(0, result); \

	3601 } \

	3602 dst.ClearForWrite(vform); \

	3603 return dst; \

	3604 }

	3605 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)

	3606 #undef DEFINE_NEON_FP_PAIR_OP

	3607

	3608 LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst,

	3609 const LogicVRegister& src, FPMinMaxOp Op) {

	3610 DCHECK_EQ(vform, kFormat4S);

	3611 USE(vform);

	3612 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));

	3613 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));

	3614 float result = (this->*Op)(result1, result2);

	3615 dst.ClearForWrite(kFormatS);

	3616 dst.SetFloat<float>(0, result);

	3617 return dst;

	3618 }

	3619

	3620 LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst,

	3621 const LogicVRegister& src) {

	3622 return FMinMaxV(vform, dst, src, &Simulator::FPMax);

	3623 }

	3624

	3625 LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst,

	3626 const LogicVRegister& src) {

	3627 return FMinMaxV(vform, dst, src, &Simulator::FPMin);

	3628 }

	3629

	3630 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst,

	3631 const LogicVRegister& src) {

	3632 return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM);

	3633 }

	3634

	3635 LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst,

	3636 const LogicVRegister& src) {

	3637 return FMinMaxV(vform, dst, src, &Simulator::FPMinNM);

	3638 }

	3639

	3640 LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst,

	3641 const LogicVRegister& src1,

	3642 const LogicVRegister& src2, int index) {

	3643 dst.ClearForWrite(vform);

	3644 SimVRegister temp;

	3645 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3646 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);

	3647 fmul<float>(vform, dst, src1, index_reg);

	3648 } else {

	3649 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3650 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);

	3651 fmul<double>(vform, dst, src1, index_reg);

	3652 }

	3653 return dst;

	3654 }

	3655

	3656 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,

	3657 const LogicVRegister& src1,

	3658 const LogicVRegister& src2, int index) {

	3659 dst.ClearForWrite(vform);

	3660 SimVRegister temp;

	3661 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3662 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);

	3663 fmla<float>(vform, dst, src1, index_reg);

	3664 } else {

	3665 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3666 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);

	3667 fmla<double>(vform, dst, src1, index_reg);

	3668 }

	3669 return dst;

	3670 }

	3671

	3672 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,

	3673 const LogicVRegister& src1,

	3674 const LogicVRegister& src2, int index) {

	3675 dst.ClearForWrite(vform);

	3676 SimVRegister temp;

	3677 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3678 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);

	3679 fmls<float>(vform, dst, src1, index_reg);

	3680 } else {

	3681 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3682 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);

	3683 fmls<double>(vform, dst, src1, index_reg);

	3684 }

	3685 return dst;

	3686 }

	3687

	3688 LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst,

	3689 const LogicVRegister& src1,

	3690 const LogicVRegister& src2, int index) {

	3691 dst.ClearForWrite(vform);

	3692 SimVRegister temp;

	3693 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3694 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);

	3695 fmulx<float>(vform, dst, src1, index_reg);

	3696

	3697 } else {

	3698 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3699 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);

	3700 fmulx<double>(vform, dst, src1, index_reg);

	3701 }

	3702 return dst;

	3703 }

	3704

	3705 LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst,

	3706 const LogicVRegister& src,

	3707 FPRounding rounding_mode,

	3708 bool inexact_exception) {

	3709 dst.ClearForWrite(vform);

	3710 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3711 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3712 float input = src.Float<float>(i);

	3713 float rounded = FPRoundInt(input, rounding_mode);

	3714 if (inexact_exception && !std::isnan(input) && (input != rounded)) {

	3715 FPProcessException();

	3716 }

	3717 dst.SetFloat<float>(i, rounded);

	3718 }

	3719 } else {

	3720 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3721 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3722 double input = src.Float<double>(i);

	3723 double rounded = FPRoundInt(input, rounding_mode);

	3724 if (inexact_exception && !std::isnan(input) && (input != rounded)) {

	3725 FPProcessException();

	3726 }

	3727 dst.SetFloat<double>(i, rounded);

	3728 }

	3729 }

	3730 return dst;

	3731 }

	3732

	3733 LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst,

	3734 const LogicVRegister& src,

	3735 FPRounding rounding_mode, int fbits) {

	3736 dst.ClearForWrite(vform);

	3737 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3738 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3739 float op = src.Float<float>(i) * std::pow(2.0f, fbits);

	3740 dst.SetInt(vform, i, FPToInt32(op, rounding_mode));

	3741 }

	3742 } else {

	3743 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3744 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3745 double op = src.Float<double>(i) * std::pow(2.0, fbits);

	3746 dst.SetInt(vform, i, FPToInt64(op, rounding_mode));

	3747 }

	3748 }

	3749 return dst;

	3750 }

	3751

	3752 LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst,

	3753 const LogicVRegister& src,

	3754 FPRounding rounding_mode, int fbits) {

	3755 dst.ClearForWrite(vform);

	3756 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3757 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3758 float op = src.Float<float>(i) * std::pow(2.0f, fbits);

	3759 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));

	3760 }

	3761 } else {

	3762 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3763 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3764 double op = src.Float<double>(i) * std::pow(2.0, fbits);

	3765 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));

	3766 }

	3767 }

	3768 return dst;

	3769 }

	3770

	3771 LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst,

	3772 const LogicVRegister& src) {

	3773 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3774 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {

	3775 dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));

	3776 }

	3777 } else {

	3778 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3779 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {

	3780 dst.SetFloat(i, FPToDouble(src.Float<float>(i)));

	3781 }

	3782 }

	3783 return dst;

	3784 }

	3785

	3786 LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst,

	3787 const LogicVRegister& src) {

	3788 int lane_count = LaneCountFromFormat(vform);

	3789 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3790 for (int i = 0; i < lane_count; i++) {

	3791 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));

	3792 }

	3793 } else {

	3794 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3795 for (int i = 0; i < lane_count; i++) {

	3796 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));

	3797 }

	3798 }

	3799 return dst;

	3800 }

	3801

	3802 LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst,

	3803 const LogicVRegister& src) {

	3804 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {

	3805 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3806 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));

	3807 }

	3808 } else {

	3809 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);

	3810 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3811 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));

	3812 }

	3813 }

	3814 return dst;

	3815 }

	3816

	3817 LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst,

	3818 const LogicVRegister& src) {

	3819 int lane_count = LaneCountFromFormat(vform) / 2;

	3820 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {

	3821 for (int i = lane_count - 1; i >= 0; i--) {

	3822 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));

	3823 }

	3824 } else {

	3825 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);

	3826 for (int i = lane_count - 1; i >= 0; i--) {

	3827 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));

	3828 }

	3829 }

	3830 return dst;

	3831 }

	3832

	3833 LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst,

	3834 const LogicVRegister& src) {

	3835 dst.ClearForWrite(vform);

	3836 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);

	3837 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3838 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));

	3839 }

	3840 return dst;

	3841 }

	3842

	3843 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst,

	3844 const LogicVRegister& src) {

	3845 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);

	3846 int lane_count = LaneCountFromFormat(vform) / 2;

	3847 for (int i = lane_count - 1; i >= 0; i--) {

	3848 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));

	3849 }

	3850 return dst;

	3851 }

	3852

	3853 // Based on reference C function recip_sqrt_estimate from ARM ARM.

	3854 double Simulator::recip_sqrt_estimate(double a) {

	3855 int q0, q1, s;

	3856 double r;

	3857 if (a < 0.5) {

	3858 q0 = static_cast<int>(a * 512.0);

	3859 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);

	3860 } else {

	3861 q1 = static_cast<int>(a * 256.0);

	3862 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);

	3863 }

	3864 s = static_cast<int>(256.0 * r + 0.5);

	3865 return static_cast<double>(s) / 256.0;

	3866 }

	3867

	3868 namespace {

	3869

	3870 inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {

	3871 return unsigned_bitextract_64(start_bit, end_bit, val);

	3872 }

	3873

	3874 } // anonymous namespace

	3875

	3876 template <typename T>

	3877 T Simulator::FPRecipSqrtEstimate(T op) {

	3878 static_assert(std::is_same<float, T>::value \|\| std::is_same<double, T>::value,

	3879 "T must be a float or double");

	3880

	3881 if (std::isnan(op)) {

	3882 return FPProcessNaN(op);

	3883 } else if (op == 0.0) {

	3884 if (copysign(1.0, op) < 0.0) {

	3885 return kFP64NegativeInfinity;

	3886 } else {

	3887 return kFP64PositiveInfinity;

	3888 }

	3889 } else if (copysign(1.0, op) < 0.0) {

	3890 FPProcessException();

	3891 return FPDefaultNaN<T>();

	3892 } else if (std::isinf(op)) {

	3893 return 0.0;

	3894 } else {

	3895 uint64_t fraction;

	3896 int32_t exp, result_exp;

	3897

	3898 if (sizeof(T) == sizeof(float)) {

	3899 exp = static_cast<int32_t>(float_exp(op));

	3900 fraction = float_mantissa(op);

	3901 fraction <<= 29;

	3902 } else {

	3903 exp = static_cast<int32_t>(double_exp(op));

	3904 fraction = double_mantissa(op);

	3905 }

	3906

	3907 if (exp == 0) {

	3908 while (Bits(fraction, 51, 51) == 0) {

	3909 fraction = Bits(fraction, 50, 0) << 1;

	3910 exp -= 1;

	3911 }

	3912 fraction = Bits(fraction, 50, 0) << 1;

	3913 }

	3914

	3915 double scaled;

	3916 if (Bits(exp, 0, 0) == 0) {

	3917 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);

	3918 } else {

	3919 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);

	3920 }

	3921

	3922 if (sizeof(T) == sizeof(float)) {

	3923 result_exp = (380 - exp) / 2;

	3924 } else {

	3925 result_exp = (3068 - exp) / 2;

	3926 }

	3927

	3928 uint64_t estimate = bit_cast<uint64_t>(recip_sqrt_estimate(scaled));

	3929

	3930 if (sizeof(T) == sizeof(float)) {

	3931 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));

	3932 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));

	3933 return float_pack(0, exp_bits, est_bits);

	3934 } else {

	3935 return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));

	3936 }

	3937 }

	3938 }

	3939

	3940 LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst,

	3941 const LogicVRegister& src) {

	3942 dst.ClearForWrite(vform);

	3943 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	3944 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3945 float input = src.Float<float>(i);

	3946 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));

	3947 }

	3948 } else {

	3949 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	3950 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	3951 double input = src.Float<double>(i);

	3952 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));

	3953 }

	3954 }

	3955 return dst;

	3956 }

	3957

	3958 template <typename T>

	3959 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {

	3960 static_assert(std::is_same<float, T>::value \|\| std::is_same<double, T>::value,

	3961 "T must be a float or double");

	3962 uint32_t sign;

	3963

	3964 if (sizeof(T) == sizeof(float)) {

	3965 sign = float_sign(op);

	3966 } else {

	3967 sign = double_sign(op);

	3968 }

	3969

	3970 if (std::isnan(op)) {

	3971 return FPProcessNaN(op);

	3972 } else if (std::isinf(op)) {

	3973 return (sign == 1) ? -0.0 : 0.0;

	3974 } else if (op == 0.0) {

	3975 FPProcessException(); // FPExc_DivideByZero exception.

	3976 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;

	3977 } else if (((sizeof(T) == sizeof(float)) &&

	3978 (std::fabs(op) < std::pow(2.0, -128.0))) \|\|

	3979 ((sizeof(T) == sizeof(double)) &&

	3980 (std::fabs(op) < std::pow(2.0, -1024.0)))) {

	3981 bool overflow_to_inf = false;

	3982 switch (rounding) {

	3983 case FPTieEven:

	3984 overflow_to_inf = true;

	3985 break;

	3986 case FPPositiveInfinity:

	3987 overflow_to_inf = (sign == 0);

	3988 break;

	3989 case FPNegativeInfinity:

	3990 overflow_to_inf = (sign == 1);

	3991 break;

	3992 case FPZero:

	3993 overflow_to_inf = false;

	3994 break;

	3995 default:

	3996 break;

	3997 }

	3998 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.

	3999 if (overflow_to_inf) {

	4000 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;

	4001 } else {

	4002 // Return FPMaxNormal(sign).

	4003 if (sizeof(T) == sizeof(float)) {

	4004 return float_pack(sign, 0xfe, 0x07fffff);

	4005 } else {

	4006 return double_pack(sign, 0x7fe, 0x0fffffffffffffl);

	4007 }

	4008 }

	4009 } else {

	4010 uint64_t fraction;

	4011 int32_t exp, result_exp;

	4012 uint32_t sign;

	4013

	4014 if (sizeof(T) == sizeof(float)) {

	4015 sign = float_sign(op);

	4016 exp = static_cast<int32_t>(float_exp(op));

	4017 fraction = float_mantissa(op);

	4018 fraction <<= 29;

	4019 } else {

	4020 sign = double_sign(op);

	4021 exp = static_cast<int32_t>(double_exp(op));

	4022 fraction = double_mantissa(op);

	4023 }

	4024

	4025 if (exp == 0) {

	4026 if (Bits(fraction, 51, 51) == 0) {

	4027 exp -= 1;

	4028 fraction = Bits(fraction, 49, 0) << 2;

	4029 } else {

	4030 fraction = Bits(fraction, 50, 0) << 1;

	4031 }

	4032 }

	4033

	4034 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);

	4035

	4036 if (sizeof(T) == sizeof(float)) {

	4037 result_exp = 253 - exp;

	4038 } else {

	4039 result_exp = 2045 - exp;

	4040 }

	4041

	4042 double estimate = recip_estimate(scaled);

	4043

	4044 fraction = double_mantissa(estimate);

	4045 if (result_exp == 0) {

	4046 fraction = (UINT64_C(1) << 51) \| Bits(fraction, 51, 1);

	4047 } else if (result_exp == -1) {

	4048 fraction = (UINT64_C(1) << 50) \| Bits(fraction, 51, 2);

	4049 result_exp = 0;

	4050 }

	4051 if (sizeof(T) == sizeof(float)) {

	4052 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));

	4053 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));

	4054 return float_pack(sign, exp_bits, frac_bits);

	4055 } else {

	4056 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));

	4057 }

	4058 }

	4059 }

	4060

	4061 LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst,

	4062 const LogicVRegister& src, FPRounding round) {

	4063 dst.ClearForWrite(vform);

	4064 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	4065 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4066 float input = src.Float<float>(i);

	4067 dst.SetFloat(i, FPRecipEstimate<float>(input, round));

	4068 }

	4069 } else {

	4070 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	4071 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4072 double input = src.Float<double>(i);

	4073 dst.SetFloat(i, FPRecipEstimate<double>(input, round));

	4074 }

	4075 }

	4076 return dst;

	4077 }

	4078

	4079 LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst,

	4080 const LogicVRegister& src) {

	4081 dst.ClearForWrite(vform);

	4082 uint64_t operand;

	4083 uint32_t result;

	4084 double dp_operand, dp_result;

	4085 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4086 operand = src.Uint(vform, i);

	4087 if (operand <= 0x3FFFFFFF) {

	4088 result = 0xFFFFFFFF;

	4089 } else {

	4090 dp_operand = operand * std::pow(2.0, -32);

	4091 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);

	4092 result = static_cast<uint32_t>(dp_result);

	4093 }

	4094 dst.SetUint(vform, i, result);

	4095 }

	4096 return dst;

	4097 }

	4098

	4099 // Based on reference C function recip_estimate from ARM ARM.

	4100 double Simulator::recip_estimate(double a) {

	4101 int q, s;

	4102 double r;

	4103 q = static_cast<int>(a * 512.0);

	4104 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);

	4105 s = static_cast<int>(256.0 * r + 0.5);

	4106 return static_cast<double>(s) / 256.0;

	4107 }

	4108

	4109 LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst,

	4110 const LogicVRegister& src) {

	4111 dst.ClearForWrite(vform);

	4112 uint64_t operand;

	4113 uint32_t result;

	4114 double dp_operand, dp_result;

	4115 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4116 operand = src.Uint(vform, i);

	4117 if (operand <= 0x7FFFFFFF) {

	4118 result = 0xFFFFFFFF;

	4119 } else {

	4120 dp_operand = operand * std::pow(2.0, -32);

	4121 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);

	4122 result = static_cast<uint32_t>(dp_result);

	4123 }

	4124 dst.SetUint(vform, i, result);

	4125 }

	4126 return dst;

	4127 }

	4128

	4129 template <typename T>

	4130 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,

	4131 const LogicVRegister& src) {

	4132 dst.ClearForWrite(vform);

	4133 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4134 T op = src.Float<T>(i);

	4135 T result;

	4136 if (std::isnan(op)) {

	4137 result = FPProcessNaN(op);

	4138 } else {

	4139 int exp;

	4140 uint32_t sign;

	4141 if (sizeof(T) == sizeof(float)) {

	4142 sign = float_sign(op);

	4143 exp = static_cast<int>(float_exp(op));

	4144 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));

	4145 result = float_pack(sign, exp, 0);

	4146 } else {

	4147 sign = double_sign(op);

	4148 exp = static_cast<int>(double_exp(op));

	4149 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));

	4150 result = double_pack(sign, exp, 0);

	4151 }

	4152 }

	4153 dst.SetFloat(i, result);

	4154 }

	4155 return dst;

	4156 }

	4157

	4158 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,

	4159 const LogicVRegister& src) {

	4160 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	4161 frecpx<float>(vform, dst, src);

	4162 } else {

	4163 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	4164 frecpx<double>(vform, dst, src);

	4165 }

	4166 return dst;

	4167 }

	4168

	4169 LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst,

	4170 const LogicVRegister& src, int fbits,

	4171 FPRounding round) {

	4172 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4173 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	4174 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);

	4175 dst.SetFloat<float>(i, result);

	4176 } else {

	4177 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	4178 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);

	4179 dst.SetFloat<double>(i, result);

	4180 }

	4181 }

	4182 return dst;

	4183 }

	4184

	4185 LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst,

	4186 const LogicVRegister& src, int fbits,

	4187 FPRounding round) {

	4188 for (int i = 0; i < LaneCountFromFormat(vform); i++) {

	4189 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {

	4190 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);

	4191 dst.SetFloat<float>(i, result);

	4192 } else {

	4193 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);

	4194 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);

	4195 dst.SetFloat<double>(i, result);

	4196 }

	4197 }

	4198 return dst;

	4199 }

	4200

	4201 #endif // USE_SIMULATOR

	4202

	4203 } // namespace internal

	4204 } // namespace v8

	4205

	4206 #endif // V8_TARGET_ARCH_ARM64

OLD	NEW

« src/arm64/instructions-arm64.h ('K') | « src/arm64/instrument-arm64.cc ('k') | src/arm64/macro-assembler-arm64.h » ('j') | src/arm64/simulator-arm64.cc » ('J')