src/arm64/simulator-arm64.h - Issue 2812573003: Reland "ARM64: Add NEON support"

Unified Diff: src/arm64/simulator-arm64.h

Issue 2812573003: Reland "ARM64: Add NEON support" (Closed)

Patch Set: Add trace directory to gitignore Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: src/arm64/simulator-arm64.h

diff --git a/src/arm64/simulator-arm64.h b/src/arm64/simulator-arm64.h

index 3016e616e4df8b8d1c41bba8df303f8a0c851125..918f0e96de31d5dc31d4503be1902874957adbae 100644

--- a/src/arm64/simulator-arm64.h

+++ b/src/arm64/simulator-arm64.h

@@ -70,6 +70,239 @@ class SimulatorStack : public v8::internal::AllStatic {

#else // !defined(USE_SIMULATOR)

+// Assemble the specified IEEE-754 components into the target type and apply

+// appropriate rounding.

+// sign: 0 = positive, 1 = negative

+// exponent: Unbiased IEEE-754 exponent.

+// mantissa: The mantissa of the input. The top bit (which is not encoded for

+// normal IEEE-754 values) must not be omitted. This bit has the

+// value 'pow(2, exponent)'.

+//

+// The input value is assumed to be a normalized value. That is, the input may

+// not be infinity or NaN. If the source value is subnormal, it must be

+// normalized before calling this function such that the highest set bit in the

+// mantissa has the value 'pow(2, exponent)'.

+//

+// Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than

+// calling a templated FPRound.

+template <class T, int ebits, int mbits>

+T FPRound(int64_t sign, int64_t exponent, uint64_t mantissa,

+ FPRounding round_mode) {

+ static_assert((sizeof(T) * 8) >= (1 + ebits + mbits),

+ "destination type T not large enough");

+ static_assert(sizeof(T) <= sizeof(uint64_t),

+ "maximum size of destination type T is 64 bits");

+ static_assert(std::is_unsigned<T>::value,

+ "destination type T must be unsigned");

+ DCHECK((sign == 0) || (sign == 1));

+ // Only FPTieEven and FPRoundOdd rounding modes are implemented.

+ DCHECK((round_mode == FPTieEven) || (round_mode == FPRoundOdd));

+ // Rounding can promote subnormals to normals, and normals to infinities. For

+ // example, a double with exponent 127 (FLT_MAX_EXP) would appear to be

+ // encodable as a float, but rounding based on the low-order mantissa bits

+ // could make it overflow. With ties-to-even rounding, this value would become

+ // an infinity.

+ // ---- Rounding Method ----

+ //

+ // The exponent is irrelevant in the rounding operation, so we treat the

+ // lowest-order bit that will fit into the result ('onebit') as having

+ // the value '1'. Similarly, the highest-order bit that won't fit into

+ // the result ('halfbit') has the value '0.5'. The 'point' sits between

+ // 'onebit' and 'halfbit':

+ //

+ // These bits fit into the result.

+ // |---------------------|

+ // mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

+ // ||

+ // / |

+ // / halfbit

+ // onebit

+ //

+ // For subnormal outputs, the range of representable bits is smaller and

+ // the position of onebit and halfbit depends on the exponent of the

+ // input, but the method is otherwise similar.

+ //

+ // onebit(frac)

+ // |

+ // | halfbit(frac) halfbit(adjusted)

+ // | / /

+ // | | |

+ // 0b00.0 (exact) -> 0b00.0 (exact) -> 0b00

+ // 0b00.0... -> 0b00.0... -> 0b00

+ // 0b00.1 (exact) -> 0b00.0111..111 -> 0b00

+ // 0b00.1... -> 0b00.1... -> 0b01

+ // 0b01.0 (exact) -> 0b01.0 (exact) -> 0b01

+ // 0b01.0... -> 0b01.0... -> 0b01

+ // 0b01.1 (exact) -> 0b01.1 (exact) -> 0b10

+ // 0b01.1... -> 0b01.1... -> 0b10

+ // 0b10.0 (exact) -> 0b10.0 (exact) -> 0b10

+ // 0b10.0... -> 0b10.0... -> 0b10

+ // 0b10.1 (exact) -> 0b10.0111..111 -> 0b10

+ // 0b10.1... -> 0b10.1... -> 0b11

+ // 0b11.0 (exact) -> 0b11.0 (exact) -> 0b11

+ // ... / | / |

+ // / | / |

+ // / |

+ // adjusted = frac - (halfbit(mantissa) & ~onebit(frac)); / |

+ //

+ // mantissa = (mantissa >> shift) + halfbit(adjusted);

+ const int mantissa_offset = 0;

+ const int exponent_offset = mantissa_offset + mbits;

+ const int sign_offset = exponent_offset + ebits;

+ DCHECK_EQ(sign_offset, static_cast<int>(sizeof(T) * 8 - 1));

+ // Bail out early for zero inputs.

+ if (mantissa == 0) {

+ return static_cast<T>(sign << sign_offset);

+ }

+ // If all bits in the exponent are set, the value is infinite or NaN.

+ // This is true for all binary IEEE-754 formats.

+ const int infinite_exponent = (1 << ebits) - 1;

+ const int max_normal_exponent = infinite_exponent - 1;

+ // Apply the exponent bias to encode it for the result. Doing this early makes

+ // it easy to detect values that will be infinite or subnormal.

+ exponent += max_normal_exponent >> 1;

+ if (exponent > max_normal_exponent) {

+ // Overflow: the input is too large for the result type to represent.

+ if (round_mode == FPTieEven) {

+ // FPTieEven rounding mode handles overflows using infinities.

+ exponent = infinite_exponent;

+ mantissa = 0;

+ } else {

+ DCHECK_EQ(round_mode, FPRoundOdd);

+ // FPRoundOdd rounding mode handles overflows using the largest magnitude

+ // normal number.

+ exponent = max_normal_exponent;

+ mantissa = (UINT64_C(1) << exponent_offset) - 1;

+ }

+ return static_cast<T>((sign << sign_offset) |

+ (exponent << exponent_offset) |

+ (mantissa << mantissa_offset));

+ }

+ // Calculate the shift required to move the top mantissa bit to the proper

+ // place in the destination type.

+ const int highest_significant_bit = 63 - CountLeadingZeros(mantissa, 64);

+ int shift = highest_significant_bit - mbits;

+ if (exponent <= 0) {

+ // The output will be subnormal (before rounding).

+ // For subnormal outputs, the shift must be adjusted by the exponent. The +1

+ // is necessary because the exponent of a subnormal value (encoded as 0) is

+ // the same as the exponent of the smallest normal value (encoded as 1).

+ shift += -exponent + 1;

+ // Handle inputs that would produce a zero output.

+ //

+ // Shifts higher than highest_significant_bit+1 will always produce a zero

+ // result. A shift of exactly highest_significant_bit+1 might produce a

+ // non-zero result after rounding.

+ if (shift > (highest_significant_bit + 1)) {

+ if (round_mode == FPTieEven) {

+ // The result will always be +/-0.0.

+ return static_cast<T>(sign << sign_offset);

+ } else {

+ DCHECK_EQ(round_mode, FPRoundOdd);

+ DCHECK_NE(mantissa, 0U);

+ // For FPRoundOdd, if the mantissa is too small to represent and

+ // non-zero return the next "odd" value.

+ return static_cast<T>((sign << sign_offset) | 1);

+ }

+ // Properly encode the exponent for a subnormal output.

+ exponent = 0;

+ } else {

+ // Clear the topmost mantissa bit, since this is not encoded in IEEE-754

+ // normal values.

+ mantissa &= ~(UINT64_C(1) << highest_significant_bit);

+ }

+ if (shift > 0) {

+ if (round_mode == FPTieEven) {

+ // We have to shift the mantissa to the right. Some precision is lost, so

+ // we need to apply rounding.

+ uint64_t onebit_mantissa = (mantissa >> (shift)) & 1;

+ uint64_t halfbit_mantissa = (mantissa >> (shift - 1)) & 1;

+ uint64_t adjustment = (halfbit_mantissa & ~onebit_mantissa);

+ uint64_t adjusted = mantissa - adjustment;

+ T halfbit_adjusted = (adjusted >> (shift - 1)) & 1;

+ T result =

+ static_cast<T>((sign << sign_offset) | (exponent << exponent_offset) |

+ ((mantissa >> shift) << mantissa_offset));

+ // A very large mantissa can overflow during rounding. If this happens,

+ // the exponent should be incremented and the mantissa set to 1.0

+ // (encoded as 0). Applying halfbit_adjusted after assembling the float

+ // has the nice side-effect that this case is handled for free.

+ //

+ // This also handles cases where a very large finite value overflows to

+ // infinity, or where a very large subnormal value overflows to become

+ // normal.

+ return result + halfbit_adjusted;

+ } else {

+ DCHECK_EQ(round_mode, FPRoundOdd);

+ // If any bits at position halfbit or below are set, onebit (ie. the

+ // bottom bit of the resulting mantissa) must be set.

+ uint64_t fractional_bits = mantissa & ((UINT64_C(1) << shift) - 1);

+ if (fractional_bits != 0) {

+ mantissa |= UINT64_C(1) << shift;

+ }

+ return static_cast<T>((sign << sign_offset) |

+ (exponent << exponent_offset) |

+ ((mantissa >> shift) << mantissa_offset));

+ }

+ } else {

+ // We have to shift the mantissa to the left (or not at all). The input

+ // mantissa is exactly representable in the output mantissa, so apply no

+ // rounding correction.

+ return static_cast<T>((sign << sign_offset) |

+ (exponent << exponent_offset) |

+ ((mantissa << -shift) << mantissa_offset));

+ }

+// Representation of memory, with typed getters and setters for access.

+class SimMemory {

+ public:

+ template <typename T>

+ static T AddressUntag(T address) {

+ // Cast the address using a C-style cast. A reinterpret_cast would be

+ // appropriate, but it can't cast one integral type to another.

+ uint64_t bits = (uint64_t)address;

+ return (T)(bits & ~kAddressTagMask);

+ }

+ template <typename T, typename A>

+ static T Read(A address) {

+ T value;

+ address = AddressUntag(address);

+ DCHECK((sizeof(value) == 1) || (sizeof(value) == 2) ||

+ (sizeof(value) == 4) || (sizeof(value) == 8) ||

+ (sizeof(value) == 16));

+ memcpy(&value, reinterpret_cast<const char*>(address), sizeof(value));

+ return value;

+ }

+ template <typename T, typename A>

+ static void Write(A address, T value) {

+ address = AddressUntag(address);

+ DCHECK((sizeof(value) == 1) || (sizeof(value) == 2) ||

+ (sizeof(value) == 4) || (sizeof(value) == 8) ||

+ (sizeof(value) == 16));

+ memcpy(reinterpret_cast<char*>(address), &value, sizeof(value));

+ }

+};

// The proper way to initialize a simulated system register (such as NZCV) is as

// follows:

@@ -125,29 +358,330 @@ class SimSystemRegister {

// Represent a register (r0-r31, v0-v31).

+template <int kSizeInBytes>

class SimRegisterBase {

public:

template<typename T>

void Set(T new_value) {

- value_ = 0;

+ static_assert(sizeof(new_value) <= kSizeInBytes,

+ "Size of new_value must be <= size of template type.");

+ if (sizeof(new_value) < kSizeInBytes) {

+ // All AArch64 registers are zero-extending.

+ memset(value_ + sizeof(new_value), 0, kSizeInBytes - sizeof(new_value));

+ }

memcpy(&value_, &new_value, sizeof(T));

+ NotifyRegisterWrite();

}

- template<typename T>

- T Get() const {

+ // Insert a typed value into a register, leaving the rest of the register

+ // unchanged. The lane parameter indicates where in the register the value

+ // should be inserted, in the range [ 0, sizeof(value_) / sizeof(T) ), where

+ // 0 represents the least significant bits.

+ template <typename T>

+ void Insert(int lane, T new_value) {

+ DCHECK_GE(lane, 0);

+ DCHECK_LE(sizeof(new_value) + (lane * sizeof(new_value)),

+ static_cast<unsigned>(kSizeInBytes));

+ memcpy(&value_[lane * sizeof(new_value)], &new_value, sizeof(new_value));

+ NotifyRegisterWrite();

+ }

+ template <typename T>

+ T Get(int lane = 0) const {

T result;

- memcpy(&result, &value_, sizeof(T));

+ DCHECK_GE(lane, 0);

+ DCHECK_LE(sizeof(result) + (lane * sizeof(result)),

+ static_cast<unsigned>(kSizeInBytes));

+ memcpy(&result, &value_[lane * sizeof(result)], sizeof(result));

return result;

}

+ // TODO(all): Make this return a map of updated bytes, so that we can

+ // highlight updated lanes for load-and-insert. (That never happens for scalar

+ // code, but NEON has some instructions that can update individual lanes.)

+ bool WrittenSinceLastLog() const { return written_since_last_log_; }

+ void NotifyRegisterLogged() { written_since_last_log_ = false; }

protected:

- int64_t value_;

+ uint8_t value_[kSizeInBytes];

+ // Helpers to aid with register tracing.

+ bool written_since_last_log_;

+ void NotifyRegisterWrite() { written_since_last_log_ = true; }

};

+typedef SimRegisterBase<kXRegSize> SimRegister; // r0-r31

+typedef SimRegisterBase<kQRegSize> SimVRegister; // v0-v31

+// Representation of a vector register, with typed getters and setters for lanes

+// and additional information to represent lane state.

+class LogicVRegister {

+ public:

+ inline LogicVRegister(SimVRegister& other) // NOLINT

+ : register_(other) {

+ for (unsigned i = 0; i < arraysize(saturated_); i++) {

+ saturated_[i] = kNotSaturated;

+ }

+ for (unsigned i = 0; i < arraysize(round_); i++) {

+ round_[i] = false;

+ }

+ int64_t Int(VectorFormat vform, int index) const {

+ int64_t element;

+ switch (LaneSizeInBitsFromFormat(vform)) {

+ case 8:

+ element = register_.Get<int8_t>(index);

+ break;

+ case 16:

+ element = register_.Get<int16_t>(index);

+ break;

+ case 32:

+ element = register_.Get<int32_t>(index);

+ break;

+ case 64:

+ element = register_.Get<int64_t>(index);

+ break;

+ default:

+ UNREACHABLE();

+ return 0;

+ }

+ return element;

+ }

+ uint64_t Uint(VectorFormat vform, int index) const {

+ uint64_t element;

+ switch (LaneSizeInBitsFromFormat(vform)) {

+ case 8:

+ element = register_.Get<uint8_t>(index);

+ break;

+ case 16:

+ element = register_.Get<uint16_t>(index);

+ break;

+ case 32:

+ element = register_.Get<uint32_t>(index);

+ break;

+ case 64:

+ element = register_.Get<uint64_t>(index);

+ break;

+ default:

+ UNREACHABLE();

+ return 0;

+ }

+ return element;

+ }

+ uint64_t UintLeftJustified(VectorFormat vform, int index) const {

+ return Uint(vform, index) << (64 - LaneSizeInBitsFromFormat(vform));

+ }

+ int64_t IntLeftJustified(VectorFormat vform, int index) const {

+ uint64_t value = UintLeftJustified(vform, index);

+ int64_t result;

+ memcpy(&result, &value, sizeof(result));

+ return result;

+ }

+ void SetInt(VectorFormat vform, int index, int64_t value) const {

+ switch (LaneSizeInBitsFromFormat(vform)) {

+ case 8:

+ register_.Insert(index, static_cast<int8_t>(value));

+ break;

+ case 16:

+ register_.Insert(index, static_cast<int16_t>(value));

+ break;

+ case 32:

+ register_.Insert(index, static_cast<int32_t>(value));

+ break;

+ case 64:

+ register_.Insert(index, static_cast<int64_t>(value));

+ break;

+ default:

+ UNREACHABLE();

+ return;

+ }

+ void SetIntArray(VectorFormat vform, const int64_t* src) const {

+ ClearForWrite(vform);

+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {

+ SetInt(vform, i, src[i]);

+ }

+ void SetUint(VectorFormat vform, int index, uint64_t value) const {

+ switch (LaneSizeInBitsFromFormat(vform)) {

+ case 8:

+ register_.Insert(index, static_cast<uint8_t>(value));

+ break;

+ case 16:

+ register_.Insert(index, static_cast<uint16_t>(value));

+ break;

+ case 32:

+ register_.Insert(index, static_cast<uint32_t>(value));

+ break;

+ case 64:

+ register_.Insert(index, static_cast<uint64_t>(value));

+ break;

+ default:

+ UNREACHABLE();

+ return;

+ }

+ void SetUintArray(VectorFormat vform, const uint64_t* src) const {

+ ClearForWrite(vform);

+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {

+ SetUint(vform, i, src[i]);

+ }

+ void ReadUintFromMem(VectorFormat vform, int index, uint64_t addr) const;

+ void WriteUintToMem(VectorFormat vform, int index, uint64_t addr) const;

+ template <typename T>

+ T Float(int index) const {

+ return register_.Get<T>(index);

+ }

+ template <typename T>

+ void SetFloat(int index, T value) const {

+ register_.Insert(index, value);

+ }

+ // When setting a result in a register of size less than Q, the top bits of

+ // the Q register must be cleared.

+ void ClearForWrite(VectorFormat vform) const {

+ unsigned size = RegisterSizeInBytesFromFormat(vform);

+ for (unsigned i = size; i < kQRegSize; i++) {

+ SetUint(kFormat16B, i, 0);

+ }

-typedef SimRegisterBase SimRegister; // r0-r31

-typedef SimRegisterBase SimFPRegister; // v0-v31

+ // Saturation state for each lane of a vector.

+ enum Saturation {

+ kNotSaturated = 0,

+ kSignedSatPositive = 1 << 0,

+ kSignedSatNegative = 1 << 1,

+ kSignedSatMask = kSignedSatPositive | kSignedSatNegative,

+ kSignedSatUndefined = kSignedSatMask,

+ kUnsignedSatPositive = 1 << 2,

+ kUnsignedSatNegative = 1 << 3,

+ kUnsignedSatMask = kUnsignedSatPositive | kUnsignedSatNegative,

+ kUnsignedSatUndefined = kUnsignedSatMask

+ };

+ // Getters for saturation state.

+ Saturation GetSignedSaturation(int index) {

+ return static_cast<Saturation>(saturated_[index] & kSignedSatMask);

+ }

+ Saturation GetUnsignedSaturation(int index) {

+ return static_cast<Saturation>(saturated_[index] & kUnsignedSatMask);

+ }

+ // Setters for saturation state.

+ void ClearSat(int index) { saturated_[index] = kNotSaturated; }

+ void SetSignedSat(int index, bool positive) {

+ SetSatFlag(index, positive ? kSignedSatPositive : kSignedSatNegative);

+ }

+ void SetUnsignedSat(int index, bool positive) {

+ SetSatFlag(index, positive ? kUnsignedSatPositive : kUnsignedSatNegative);

+ }

+ void SetSatFlag(int index, Saturation sat) {

+ saturated_[index] = static_cast<Saturation>(saturated_[index] | sat);

+ DCHECK_NE(sat & kUnsignedSatMask, kUnsignedSatUndefined);

+ DCHECK_NE(sat & kSignedSatMask, kSignedSatUndefined);

+ }

+ // Saturate lanes of a vector based on saturation state.

+ LogicVRegister& SignedSaturate(VectorFormat vform) {

+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {

+ Saturation sat = GetSignedSaturation(i);

+ if (sat == kSignedSatPositive) {

+ SetInt(vform, i, MaxIntFromFormat(vform));

+ } else if (sat == kSignedSatNegative) {

+ SetInt(vform, i, MinIntFromFormat(vform));

+ }

+ return *this;

+ }

+ LogicVRegister& UnsignedSaturate(VectorFormat vform) {

+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {

+ Saturation sat = GetUnsignedSaturation(i);

+ if (sat == kUnsignedSatPositive) {

+ SetUint(vform, i, MaxUintFromFormat(vform));

+ } else if (sat == kUnsignedSatNegative) {

+ SetUint(vform, i, 0);

+ }

+ return *this;

+ }

+ // Getter for rounding state.

+ bool GetRounding(int index) { return round_[index]; }

+ // Setter for rounding state.

+ void SetRounding(int index, bool round) { round_[index] = round; }

+ // Round lanes of a vector based on rounding state.

+ LogicVRegister& Round(VectorFormat vform) {

+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {

+ SetUint(vform, i, Uint(vform, i) + (GetRounding(i) ? 1 : 0));

+ }

+ return *this;

+ }

+ // Unsigned halve lanes of a vector, and use the saturation state to set the

+ // top bit.

+ LogicVRegister& Uhalve(VectorFormat vform) {

+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {

+ uint64_t val = Uint(vform, i);

+ SetRounding(i, (val & 1) == 1);

+ val >>= 1;

+ if (GetUnsignedSaturation(i) != kNotSaturated) {

+ // If the operation causes unsigned saturation, the bit shifted into the

+ // most significant bit must be set.

+ val |= (MaxUintFromFormat(vform) >> 1) + 1;

+ }

+ SetInt(vform, i, val);

+ }

+ return *this;

+ }

+ // Signed halve lanes of a vector, and use the carry state to set the top bit.

+ LogicVRegister& Halve(VectorFormat vform) {

+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {

+ int64_t val = Int(vform, i);

+ SetRounding(i, (val & 1) == 1);

+ val >>= 1;

+ if (GetSignedSaturation(i) != kNotSaturated) {

+ // If the operation causes signed saturation, the sign bit must be

+ // inverted.

+ val ^= (MaxUintFromFormat(vform) >> 1) + 1;

+ }

+ SetInt(vform, i, val);

+ }

+ return *this;

+ }

+ private:

+ SimVRegister& register_;

+ // Allocate one saturation state entry per lane; largest register is type Q,

+ // and lanes can be a minimum of one byte wide.

+ Saturation saturated_[kQRegSize];

+ // Allocate one rounding state entry per lane.

+ bool round_[kQRegSize];

+};

class Simulator : public DecoderVisitor {

public:

@@ -315,6 +849,7 @@ class Simulator : public DecoderVisitor {

CheckBreakNext();

Decode(pc_);

increment_pc();

+ LogAllWrittenRegisters();

CheckBreakpoints();

}

@@ -333,7 +868,7 @@ class Simulator : public DecoderVisitor {

template<typename T>

T reg(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister) const {

- DCHECK(code < kNumberOfRegisters);

+ DCHECK_LT(code, static_cast<unsigned>(kNumberOfRegisters));

if (IsZeroRegister(code, r31mode)) {

return 0;

}

@@ -349,6 +884,8 @@ class Simulator : public DecoderVisitor {

return reg<int64_t>(code, r31mode);

}

+ enum RegLogMode { LogRegWrites, NoRegLog };

// Write 'value' into an integer register. The value is zero-extended. This

// behaviour matches AArch64 register writes.

template<typename T>

@@ -373,7 +910,7 @@ class Simulator : public DecoderVisitor {

template <typename T>

void set_reg_no_log(unsigned code, T value,

Reg31Mode r31mode = Reg31IsZeroRegister) {

- DCHECK(code < kNumberOfRegisters);

+ DCHECK_LT(code, static_cast<unsigned>(kNumberOfRegisters));

if (!IsZeroRegister(code, r31mode)) {

registers_[code].Set(value);

}

@@ -392,16 +929,39 @@ class Simulator : public DecoderVisitor {

// Commonly-used special cases.

template<typename T>

void set_lr(T value) {

- DCHECK(sizeof(T) == kPointerSize);

+ DCHECK_EQ(sizeof(T), static_cast<unsigned>(kPointerSize));

set_reg(kLinkRegCode, value);

}

template<typename T>

void set_sp(T value) {

- DCHECK(sizeof(T) == kPointerSize);

+ DCHECK_EQ(sizeof(T), static_cast<unsigned>(kPointerSize));

set_reg(31, value, Reg31IsStackPointer);

}

+ // Vector register accessors.

+ // These are equivalent to the integer register accessors, but for vector

+ // registers.

+ // A structure for representing a 128-bit Q register.

+ struct qreg_t {

+ uint8_t val[kQRegSize];

+ };

+ // Basic accessor: read the register as the specified type.

+ template <typename T>

+ T vreg(unsigned code) const {

+ static_assert((sizeof(T) == kBRegSize) || (sizeof(T) == kHRegSize) ||

+ (sizeof(T) == kSRegSize) || (sizeof(T) == kDRegSize) ||

+ (sizeof(T) == kQRegSize),

+ "Template type must match size of register.");

+ DCHECK_LT(code, static_cast<unsigned>(kNumberOfVRegisters));

+ return vregisters_[code].Get<T>();

+ }

+ inline SimVRegister& vreg(unsigned code) { return vregisters_[code]; }

int64_t sp() { return xreg(31, Reg31IsStackPointer); }

int64_t jssp() { return xreg(kJSSPCode, Reg31IsStackPointer); }

int64_t fp() {

@@ -411,87 +971,135 @@ class Simulator : public DecoderVisitor {

Address get_sp() const { return reg<Address>(31, Reg31IsStackPointer); }

- template<typename T>

- T fpreg(unsigned code) const {

- DCHECK(code < kNumberOfRegisters);

- return fpregisters_[code].Get<T>();

- }

+ // Common specialized accessors for the vreg() template.

+ uint8_t breg(unsigned code) const { return vreg<uint8_t>(code); }

- // Common specialized accessors for the fpreg() template.

- float sreg(unsigned code) const {

- return fpreg<float>(code);

- }

+ float hreg(unsigned code) const { return vreg<uint16_t>(code); }

- uint32_t sreg_bits(unsigned code) const {

- return fpreg<uint32_t>(code);

- }

+ float sreg(unsigned code) const { return vreg<float>(code); }

- double dreg(unsigned code) const {

- return fpreg<double>(code);

- }

+ uint32_t sreg_bits(unsigned code) const { return vreg<uint32_t>(code); }

- uint64_t dreg_bits(unsigned code) const {

- return fpreg<uint64_t>(code);

- }

+ double dreg(unsigned code) const { return vreg<double>(code); }

+ uint64_t dreg_bits(unsigned code) const { return vreg<uint64_t>(code); }

+ qreg_t qreg(unsigned code) const { return vreg<qreg_t>(code); }

+ // As above, with parameterized size and return type. The value is

+ // either zero-extended or truncated to fit, as required.

+ template <typename T>

+ T vreg(unsigned size, unsigned code) const {

+ uint64_t raw = 0;

+ T result;

- double fpreg(unsigned size, unsigned code) const {

switch (size) {

- case kSRegSizeInBits: return sreg(code);

- case kDRegSizeInBits: return dreg(code);

+ case kSRegSize:

+ raw = vreg<uint32_t>(code);

+ break;

+ case kDRegSize:

+ raw = vreg<uint64_t>(code);

+ break;

default:

UNREACHABLE();

- return 0.0;

+ break;

}

+ static_assert(sizeof(result) <= sizeof(raw),

+ "Template type must be <= 64 bits.");

+ // Copy the result and truncate to fit. This assumes a little-endian host.

+ memcpy(&result, &raw, sizeof(result));

+ return result;

}

// Write 'value' into a floating-point register. The value is zero-extended.

// This behaviour matches AArch64 register writes.

- template<typename T>

- void set_fpreg(unsigned code, T value) {

- set_fpreg_no_log(code, value);

- if (sizeof(value) <= kSRegSize) {

- LogFPRegister(code, kPrintSRegValue);

- } else {

- LogFPRegister(code, kPrintDRegValue);

+ template <typename T>

+ void set_vreg(unsigned code, T value, RegLogMode log_mode = LogRegWrites) {

+ static_assert(

+ (sizeof(value) == kBRegSize) || (sizeof(value) == kHRegSize) ||

+ (sizeof(value) == kSRegSize) || (sizeof(value) == kDRegSize) ||

+ (sizeof(value) == kQRegSize),

+ "Template type must match size of register.");

+ DCHECK_LT(code, static_cast<unsigned>(kNumberOfVRegisters));

+ vregisters_[code].Set(value);

+ if (log_mode == LogRegWrites) {

+ LogVRegister(code, GetPrintRegisterFormat(value));

}

- // Common specialized accessors for the set_fpreg() template.

- void set_sreg(unsigned code, float value) {

- set_fpreg(code, value);

+ // Common specialized accessors for the set_vreg() template.

+ void set_breg(unsigned code, int8_t value,

+ RegLogMode log_mode = LogRegWrites) {

+ set_vreg(code, value, log_mode);

+ }

+ void set_hreg(unsigned code, int16_t value,

+ RegLogMode log_mode = LogRegWrites) {

+ set_vreg(code, value, log_mode);

+ }

+ void set_sreg(unsigned code, float value,

+ RegLogMode log_mode = LogRegWrites) {

+ set_vreg(code, value, log_mode);

+ }

+ void set_sreg_bits(unsigned code, uint32_t value,

+ RegLogMode log_mode = LogRegWrites) {

+ set_vreg(code, value, log_mode);

}

- void set_sreg_bits(unsigned code, uint32_t value) {

- set_fpreg(code, value);

+ void set_dreg(unsigned code, double value,

+ RegLogMode log_mode = LogRegWrites) {

+ set_vreg(code, value, log_mode);

}

- void set_dreg(unsigned code, double value) {

- set_fpreg(code, value);

+ void set_dreg_bits(unsigned code, uint64_t value,

+ RegLogMode log_mode = LogRegWrites) {

+ set_vreg(code, value, log_mode);

}

- void set_dreg_bits(unsigned code, uint64_t value) {

- set_fpreg(code, value);

+ void set_qreg(unsigned code, qreg_t value,

+ RegLogMode log_mode = LogRegWrites) {

+ set_vreg(code, value, log_mode);

}

// As above, but don't automatically log the register update.

template <typename T>

- void set_fpreg_no_log(unsigned code, T value) {

- DCHECK((sizeof(value) == kDRegSize) || (sizeof(value) == kSRegSize));

- DCHECK(code < kNumberOfFPRegisters);

- fpregisters_[code].Set(value);

+ void set_vreg_no_log(unsigned code, T value) {

+ STATIC_ASSERT((sizeof(value) == kBRegSize) ||

+ (sizeof(value) == kHRegSize) ||

+ (sizeof(value) == kSRegSize) ||

+ (sizeof(value) == kDRegSize) || (sizeof(value) == kQRegSize));

+ DCHECK_LT(code, static_cast<unsigned>(kNumberOfVRegisters));

+ vregisters_[code].Set(value);

+ }

+ void set_breg_no_log(unsigned code, uint8_t value) {

+ set_vreg_no_log(code, value);

+ }

+ void set_hreg_no_log(unsigned code, uint16_t value) {

+ set_vreg_no_log(code, value);

}

void set_sreg_no_log(unsigned code, float value) {

- set_fpreg_no_log(code, value);

+ set_vreg_no_log(code, value);

}

void set_dreg_no_log(unsigned code, double value) {

- set_fpreg_no_log(code, value);

+ set_vreg_no_log(code, value);

+ }

+ void set_qreg_no_log(unsigned code, qreg_t value) {

+ set_vreg_no_log(code, value);

}

SimSystemRegister& nzcv() { return nzcv_; }

SimSystemRegister& fpcr() { return fpcr_; }

+ FPRounding RMode() { return static_cast<FPRounding>(fpcr_.RMode()); }

+ bool DN() { return fpcr_.DN() != 0; }

// Debug helpers

@@ -518,66 +1126,195 @@ class Simulator : public DecoderVisitor {

// Print all registers of the specified types.

void PrintRegisters();

- void PrintFPRegisters();

+ void PrintVRegisters();

void PrintSystemRegisters();

- // Like Print* (above), but respect log_parameters().

- void LogSystemRegisters() {

- if (log_parameters() & LOG_SYS_REGS) PrintSystemRegisters();

+ // As above, but only print the registers that have been updated.

+ void PrintWrittenRegisters();

+ void PrintWrittenVRegisters();

+ // As above, but respect LOG_REG and LOG_VREG.

+ void LogWrittenRegisters() {

+ if (log_parameters() & LOG_REGS) PrintWrittenRegisters();

+ }

+ void LogWrittenVRegisters() {

+ if (log_parameters() & LOG_VREGS) PrintWrittenVRegisters();

+ }

+ void LogAllWrittenRegisters() {

+ LogWrittenRegisters();

+ LogWrittenVRegisters();

+ }

+ // Specify relevant register formats for Print(V)Register and related helpers.

+ enum PrintRegisterFormat {

+ // The lane size.

+ kPrintRegLaneSizeB = 0 << 0,

+ kPrintRegLaneSizeH = 1 << 0,

+ kPrintRegLaneSizeS = 2 << 0,

+ kPrintRegLaneSizeW = kPrintRegLaneSizeS,

+ kPrintRegLaneSizeD = 3 << 0,

+ kPrintRegLaneSizeX = kPrintRegLaneSizeD,

+ kPrintRegLaneSizeQ = 4 << 0,

+ kPrintRegLaneSizeOffset = 0,

+ kPrintRegLaneSizeMask = 7 << 0,

+ // The lane count.

+ kPrintRegAsScalar = 0,

+ kPrintRegAsDVector = 1 << 3,

+ kPrintRegAsQVector = 2 << 3,

+ kPrintRegAsVectorMask = 3 << 3,

+ // Indicate floating-point format lanes. (This flag is only supported for S-

+ // and D-sized lanes.)

+ kPrintRegAsFP = 1 << 5,

+ // Supported combinations.

+ kPrintXReg = kPrintRegLaneSizeX | kPrintRegAsScalar,

+ kPrintWReg = kPrintRegLaneSizeW | kPrintRegAsScalar,

+ kPrintSReg = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP,

+ kPrintDReg = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP,

+ kPrintReg1B = kPrintRegLaneSizeB | kPrintRegAsScalar,

+ kPrintReg8B = kPrintRegLaneSizeB | kPrintRegAsDVector,

+ kPrintReg16B = kPrintRegLaneSizeB | kPrintRegAsQVector,

+ kPrintReg1H = kPrintRegLaneSizeH | kPrintRegAsScalar,

+ kPrintReg4H = kPrintRegLaneSizeH | kPrintRegAsDVector,

+ kPrintReg8H = kPrintRegLaneSizeH | kPrintRegAsQVector,

+ kPrintReg1S = kPrintRegLaneSizeS | kPrintRegAsScalar,

+ kPrintReg2S = kPrintRegLaneSizeS | kPrintRegAsDVector,

+ kPrintReg4S = kPrintRegLaneSizeS | kPrintRegAsQVector,

+ kPrintReg1SFP = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP,

+ kPrintReg2SFP = kPrintRegLaneSizeS | kPrintRegAsDVector | kPrintRegAsFP,

+ kPrintReg4SFP = kPrintRegLaneSizeS | kPrintRegAsQVector | kPrintRegAsFP,

+ kPrintReg1D = kPrintRegLaneSizeD | kPrintRegAsScalar,

+ kPrintReg2D = kPrintRegLaneSizeD | kPrintRegAsQVector,

+ kPrintReg1DFP = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP,

+ kPrintReg2DFP = kPrintRegLaneSizeD | kPrintRegAsQVector | kPrintRegAsFP,

+ kPrintReg1Q = kPrintRegLaneSizeQ | kPrintRegAsScalar

+ };

+ unsigned GetPrintRegLaneSizeInBytesLog2(PrintRegisterFormat format) {

+ return (format & kPrintRegLaneSizeMask) >> kPrintRegLaneSizeOffset;

}

- void LogRegisters() {

- if (log_parameters() & LOG_REGS) PrintRegisters();

+ unsigned GetPrintRegLaneSizeInBytes(PrintRegisterFormat format) {

+ return 1 << GetPrintRegLaneSizeInBytesLog2(format);

}

- void LogFPRegisters() {

- if (log_parameters() & LOG_FP_REGS) PrintFPRegisters();

+ unsigned GetPrintRegSizeInBytesLog2(PrintRegisterFormat format) {

+ if (format & kPrintRegAsDVector) return kDRegSizeLog2;

+ if (format & kPrintRegAsQVector) return kQRegSizeLog2;

+ // Scalar types.

+ return GetPrintRegLaneSizeInBytesLog2(format);

}

- // Specify relevant register sizes, for PrintFPRegister.

- //

- // These values are bit masks; they can be combined in case multiple views of

- // a machine register are interesting.

- enum PrintFPRegisterSizes {

- kPrintDRegValue = 1 << kDRegSize,

- kPrintSRegValue = 1 << kSRegSize,

- kPrintAllFPRegValues = kPrintDRegValue | kPrintSRegValue

- };

+ unsigned GetPrintRegSizeInBytes(PrintRegisterFormat format) {

+ return 1 << GetPrintRegSizeInBytesLog2(format);

+ }

+ unsigned GetPrintRegLaneCount(PrintRegisterFormat format) {

+ unsigned reg_size_log2 = GetPrintRegSizeInBytesLog2(format);

+ unsigned lane_size_log2 = GetPrintRegLaneSizeInBytesLog2(format);

+ DCHECK_GE(reg_size_log2, lane_size_log2);

+ return 1 << (reg_size_log2 - lane_size_log2);

+ }

+ template <typename T>

+ PrintRegisterFormat GetPrintRegisterFormat(T value) {

+ return GetPrintRegisterFormatForSize(sizeof(value));

+ }

+ PrintRegisterFormat GetPrintRegisterFormat(double value) {

+ static_assert(sizeof(value) == kDRegSize,

+ "D register must be size of double.");

+ return GetPrintRegisterFormatForSizeFP(sizeof(value));

+ }

+ PrintRegisterFormat GetPrintRegisterFormat(float value) {

+ static_assert(sizeof(value) == kSRegSize,

+ "S register must be size of float.");

+ return GetPrintRegisterFormatForSizeFP(sizeof(value));

+ }

+ PrintRegisterFormat GetPrintRegisterFormat(VectorFormat vform);

+ PrintRegisterFormat GetPrintRegisterFormatFP(VectorFormat vform);

+ PrintRegisterFormat GetPrintRegisterFormatForSize(size_t reg_size,

+ size_t lane_size);

+ PrintRegisterFormat GetPrintRegisterFormatForSize(size_t size) {

+ return GetPrintRegisterFormatForSize(size, size);

+ }

+ PrintRegisterFormat GetPrintRegisterFormatForSizeFP(size_t size) {

+ switch (size) {

+ default:

+ UNREACHABLE();

+ case kDRegSize:

+ return kPrintDReg;

+ case kSRegSize:

+ return kPrintSReg;

+ }

+ PrintRegisterFormat GetPrintRegisterFormatTryFP(PrintRegisterFormat format) {

+ if ((GetPrintRegLaneSizeInBytes(format) == kSRegSize) ||

+ (GetPrintRegLaneSizeInBytes(format) == kDRegSize)) {

+ return static_cast<PrintRegisterFormat>(format | kPrintRegAsFP);

+ }

+ return format;

+ }

// Print individual register values (after update).

void PrintRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer);

- void PrintFPRegister(unsigned code,

- PrintFPRegisterSizes sizes = kPrintAllFPRegValues);

+ void PrintVRegister(unsigned code, PrintRegisterFormat sizes);

void PrintSystemRegister(SystemRegister id);

// Like Print* (above), but respect log_parameters().

void LogRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer) {

if (log_parameters() & LOG_REGS) PrintRegister(code, r31mode);

}

- void LogFPRegister(unsigned code,

- PrintFPRegisterSizes sizes = kPrintAllFPRegValues) {

- if (log_parameters() & LOG_FP_REGS) PrintFPRegister(code, sizes);

+ void LogVRegister(unsigned code, PrintRegisterFormat format) {

+ if (log_parameters() & LOG_VREGS) PrintVRegister(code, format);

}

void LogSystemRegister(SystemRegister id) {

if (log_parameters() & LOG_SYS_REGS) PrintSystemRegister(id);

}

// Print memory accesses.

- void PrintRead(uintptr_t address, size_t size, unsigned reg_code);

- void PrintReadFP(uintptr_t address, size_t size, unsigned reg_code);

- void PrintWrite(uintptr_t address, size_t size, unsigned reg_code);

- void PrintWriteFP(uintptr_t address, size_t size, unsigned reg_code);

+ void PrintRead(uintptr_t address, unsigned reg_code,

+ PrintRegisterFormat format);

+ void PrintWrite(uintptr_t address, unsigned reg_code,

+ PrintRegisterFormat format);

+ void PrintVRead(uintptr_t address, unsigned reg_code,

+ PrintRegisterFormat format, unsigned lane);

+ void PrintVWrite(uintptr_t address, unsigned reg_code,

+ PrintRegisterFormat format, unsigned lane);

// Like Print* (above), but respect log_parameters().

- void LogRead(uintptr_t address, size_t size, unsigned reg_code) {

- if (log_parameters() & LOG_REGS) PrintRead(address, size, reg_code);

- }

- void LogReadFP(uintptr_t address, size_t size, unsigned reg_code) {

- if (log_parameters() & LOG_FP_REGS) PrintReadFP(address, size, reg_code);

- }

- void LogWrite(uintptr_t address, size_t size, unsigned reg_code) {

- if (log_parameters() & LOG_WRITE) PrintWrite(address, size, reg_code);

+ void LogRead(uintptr_t address, unsigned reg_code,

+ PrintRegisterFormat format) {

+ if (log_parameters() & LOG_REGS) PrintRead(address, reg_code, format);

+ }

+ void LogWrite(uintptr_t address, unsigned reg_code,

+ PrintRegisterFormat format) {

+ if (log_parameters() & LOG_WRITE) PrintWrite(address, reg_code, format);

+ }

+ void LogVRead(uintptr_t address, unsigned reg_code,

+ PrintRegisterFormat format, unsigned lane = 0) {

+ if (log_parameters() & LOG_VREGS) {

+ PrintVRead(address, reg_code, format, lane);

+ }

}

- void LogWriteFP(uintptr_t address, size_t size, unsigned reg_code) {

- if (log_parameters() & LOG_WRITE) PrintWriteFP(address, size, reg_code);

+ void LogVWrite(uintptr_t address, unsigned reg_code,

+ PrintRegisterFormat format, unsigned lane = 0) {

+ if (log_parameters() & LOG_WRITE) {

+ PrintVWrite(address, reg_code, format, lane);

+ }

}

int log_parameters() { return log_parameters_; }

@@ -596,6 +1333,14 @@ class Simulator : public DecoderVisitor {

}

+ // Helper functions for register tracing.

+ void PrintRegisterRawHelper(unsigned code, Reg31Mode r31mode,

+ int size_in_bytes = kXRegSize);

+ void PrintVRegisterRawHelper(unsigned code, int bytes = kQRegSize,

+ int lsb = 0);

+ void PrintVRegisterFPHelper(unsigned code, unsigned lane_size_in_bytes,

+ int lane_count = 1, int rightmost_lane = 0);

static inline const char* WRegNameForCode(unsigned code,

Reg31Mode mode = Reg31IsZeroRegister);

static inline const char* XRegNameForCode(unsigned code,

@@ -670,6 +1415,10 @@ class Simulator : public DecoderVisitor {

void LoadStoreWriteBack(unsigned addr_reg,

int64_t offset,

AddrMode addrmode);

+ void NEONLoadStoreMultiStructHelper(const Instruction* instr,

+ AddrMode addr_mode);

+ void NEONLoadStoreSingleStructHelper(const Instruction* instr,

+ AddrMode addr_mode);

void CheckMemoryAccess(uintptr_t address, uintptr_t stack);

// Memory read helpers.

@@ -677,7 +1426,8 @@ class Simulator : public DecoderVisitor {

T MemoryRead(A address) {

T value;

STATIC_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||

- (sizeof(value) == 4) || (sizeof(value) == 8));

+ (sizeof(value) == 4) || (sizeof(value) == 8) ||

+ (sizeof(value) == 16));

memcpy(&value, reinterpret_cast<const void*>(address), sizeof(value));

return value;

}

@@ -686,7 +1436,8 @@ class Simulator : public DecoderVisitor {

template <typename T, typename A>

void MemoryWrite(A address, T value) {

STATIC_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||

- (sizeof(value) == 4) || (sizeof(value) == 8));

+ (sizeof(value) == 4) || (sizeof(value) == 8) ||

+ (sizeof(value) == 16));

memcpy(reinterpret_cast<void*>(address), &value, sizeof(value));

}

@@ -704,14 +1455,652 @@ class Simulator : public DecoderVisitor {

void DataProcessing2Source(Instruction* instr);

template <typename T>

void BitfieldHelper(Instruction* instr);

+ uint16_t PolynomialMult(uint8_t op1, uint8_t op2);

+ void ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr);

+ void ld1(VectorFormat vform, LogicVRegister dst, int index, uint64_t addr);

+ void ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr);

+ void ld2(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2,

+ uint64_t addr);

+ void ld2(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2,

+ int index, uint64_t addr);

+ void ld2r(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2,

+ uint64_t addr);

+ void ld3(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2,

+ LogicVRegister dst3, uint64_t addr);

+ void ld3(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2,

+ LogicVRegister dst3, int index, uint64_t addr);

+ void ld3r(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2,

+ LogicVRegister dst3, uint64_t addr);

+ void ld4(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2,

+ LogicVRegister dst3, LogicVRegister dst4, uint64_t addr);

+ void ld4(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2,

+ LogicVRegister dst3, LogicVRegister dst4, int index, uint64_t addr);

+ void ld4r(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2,

+ LogicVRegister dst3, LogicVRegister dst4, uint64_t addr);

+ void st1(VectorFormat vform, LogicVRegister src, uint64_t addr);

+ void st1(VectorFormat vform, LogicVRegister src, int index, uint64_t addr);

+ void st2(VectorFormat vform, LogicVRegister src, LogicVRegister src2,

+ uint64_t addr);

+ void st2(VectorFormat vform, LogicVRegister src, LogicVRegister src2,

+ int index, uint64_t addr);

+ void st3(VectorFormat vform, LogicVRegister src, LogicVRegister src2,

+ LogicVRegister src3, uint64_t addr);

+ void st3(VectorFormat vform, LogicVRegister src, LogicVRegister src2,

+ LogicVRegister src3, int index, uint64_t addr);

+ void st4(VectorFormat vform, LogicVRegister src, LogicVRegister src2,

+ LogicVRegister src3, LogicVRegister src4, uint64_t addr);

+ void st4(VectorFormat vform, LogicVRegister src, LogicVRegister src2,

+ LogicVRegister src3, LogicVRegister src4, int index, uint64_t addr);

+ LogicVRegister cmp(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ Condition cond);

+ LogicVRegister cmp(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, int imm, Condition cond);

+ LogicVRegister cmptst(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister add(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister addp(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister mla(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister mls(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister mul(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister mul(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister mla(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister mls(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister pmul(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ typedef LogicVRegister (Simulator::*ByElementOp)(VectorFormat vform,

+ LogicVRegister dst,

+ const LogicVRegister& src1,

+ const LogicVRegister& src2,

+ int index);

+ LogicVRegister fmul(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister fmla(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister fmls(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister fmulx(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister smull(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister smull2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister umull(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister umull2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister smlal(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister smlal2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister umlal(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister umlal2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister smlsl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister smlsl2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister umlsl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister umlsl2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister sqdmull(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister sqdmull2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1,

+ const LogicVRegister& src2, int index);

+ LogicVRegister sqdmlal(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister sqdmlal2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1,

+ const LogicVRegister& src2, int index);

+ LogicVRegister sqdmlsl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister sqdmlsl2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1,

+ const LogicVRegister& src2, int index);

+ LogicVRegister sqdmulh(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister sqrdmulh(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1,

+ const LogicVRegister& src2, int index);

+ LogicVRegister sub(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister and_(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister orr(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister orn(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister eor(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister bic(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister bic(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, uint64_t imm);

+ LogicVRegister bif(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister bit(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister bsl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister cls(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister clz(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister cnt(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister not_(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister rbit(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister rev(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int revSize);

+ LogicVRegister rev16(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister rev32(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister rev64(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister addlp(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, bool is_signed,

+ bool do_accumulate);

+ LogicVRegister saddlp(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister uaddlp(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister sadalp(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister uadalp(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister ext(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ int index);

+ LogicVRegister ins_element(VectorFormat vform, LogicVRegister dst,

+ int dst_index, const LogicVRegister& src,

+ int src_index);

+ LogicVRegister ins_immediate(VectorFormat vform, LogicVRegister dst,

+ int dst_index, uint64_t imm);

+ LogicVRegister dup_element(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int src_index);

+ LogicVRegister dup_immediate(VectorFormat vform, LogicVRegister dst,

+ uint64_t imm);

+ LogicVRegister movi(VectorFormat vform, LogicVRegister dst, uint64_t imm);

+ LogicVRegister mvni(VectorFormat vform, LogicVRegister dst, uint64_t imm);

+ LogicVRegister orr(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, uint64_t imm);

+ LogicVRegister sshl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister ushl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister SMinMax(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ bool max);

+ LogicVRegister smax(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister smin(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister SMinMaxP(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1,

+ const LogicVRegister& src2, bool max);

+ LogicVRegister smaxp(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister sminp(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister addp(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister addv(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister uaddlv(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister saddlv(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister SMinMaxV(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, bool max);

+ LogicVRegister smaxv(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister sminv(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister uxtl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister uxtl2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister sxtl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister sxtl2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister Table(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& ind, bool zero_out_of_bounds,

+ const LogicVRegister* tab1,

+ const LogicVRegister* tab2 = NULL,

+ const LogicVRegister* tab3 = NULL,

+ const LogicVRegister* tab4 = NULL);

+ LogicVRegister tbl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& tab, const LogicVRegister& ind);

+ LogicVRegister tbl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& tab, const LogicVRegister& tab2,

+ const LogicVRegister& ind);

+ LogicVRegister tbl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& tab, const LogicVRegister& tab2,

+ const LogicVRegister& tab3, const LogicVRegister& ind);

+ LogicVRegister tbl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& tab, const LogicVRegister& tab2,

+ const LogicVRegister& tab3, const LogicVRegister& tab4,

+ const LogicVRegister& ind);

+ LogicVRegister tbx(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& tab, const LogicVRegister& ind);

+ LogicVRegister tbx(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& tab, const LogicVRegister& tab2,

+ const LogicVRegister& ind);

+ LogicVRegister tbx(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& tab, const LogicVRegister& tab2,

+ const LogicVRegister& tab3, const LogicVRegister& ind);

+ LogicVRegister tbx(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& tab, const LogicVRegister& tab2,

+ const LogicVRegister& tab3, const LogicVRegister& tab4,

+ const LogicVRegister& ind);

+ LogicVRegister uaddl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister uaddl2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister uaddw(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister uaddw2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister saddl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister saddl2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister saddw(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister saddw2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister usubl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister usubl2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister usubw(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister usubw2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister ssubl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister ssubl2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister ssubw(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister ssubw2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister UMinMax(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ bool max);

+ LogicVRegister umax(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister umin(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister UMinMaxP(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1,

+ const LogicVRegister& src2, bool max);

+ LogicVRegister umaxp(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister uminp(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister UMinMaxV(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, bool max);

+ LogicVRegister umaxv(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister uminv(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister trn1(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister trn2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister zip1(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister zip2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister uzp1(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister uzp2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister shl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister scvtf(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int fbits,

+ FPRounding rounding_mode);

+ LogicVRegister ucvtf(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int fbits,

+ FPRounding rounding_mode);

+ LogicVRegister sshll(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister sshll2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister shll(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister shll2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister ushll(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister ushll2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister sli(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister sri(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister sshr(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister ushr(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister ssra(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister usra(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister srsra(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister ursra(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister suqadd(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister usqadd(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister sqshl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister uqshl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister sqshlu(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister abs(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister neg(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister ExtractNarrow(VectorFormat vform, LogicVRegister dst,

+ bool dstIsSigned, const LogicVRegister& src,

+ bool srcIsSigned);

+ LogicVRegister xtn(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister sqxtn(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister uqxtn(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister sqxtun(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister AbsDiff(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ bool issigned);

+ LogicVRegister saba(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister uaba(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister shrn(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister shrn2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister rshrn(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister rshrn2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister uqshrn(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister uqshrn2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister uqrshrn(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister uqrshrn2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister sqshrn(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister sqshrn2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister sqrshrn(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister sqrshrn2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister sqshrun(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister sqshrun2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister sqrshrun(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister sqrshrun2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, int shift);

+ LogicVRegister sqrdmulh(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1,

+ const LogicVRegister& src2, bool round = true);

+ LogicVRegister sqdmulh(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1,

+ const LogicVRegister& src2);

+#define NEON_3VREG_LOGIC_LIST(V) \

+ V(addhn) \

+ V(addhn2) \

+ V(raddhn) \

+ V(raddhn2) \

+ V(subhn) \

+ V(subhn2) \

+ V(rsubhn) \

+ V(rsubhn2) \

+ V(pmull) \

+ V(pmull2) \

+ V(sabal) \

+ V(sabal2) \

+ V(uabal) \

+ V(uabal2) \

+ V(sabdl) \

+ V(sabdl2) \

+ V(uabdl) \

+ V(uabdl2) \

+ V(smull) \

+ V(smull2) \

+ V(umull) \

+ V(umull2) \

+ V(smlal) \

+ V(smlal2) \

+ V(umlal) \

+ V(umlal2) \

+ V(smlsl) \

+ V(smlsl2) \

+ V(umlsl) \

+ V(umlsl2) \

+ V(sqdmlal) \

+ V(sqdmlal2) \

+ V(sqdmlsl) \

+ V(sqdmlsl2) \

+ V(sqdmull) \

+ V(sqdmull2)

+#define DEFINE_LOGIC_FUNC(FXN) \

+ LogicVRegister FXN(VectorFormat vform, LogicVRegister dst, \

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ NEON_3VREG_LOGIC_LIST(DEFINE_LOGIC_FUNC)

+#undef DEFINE_LOGIC_FUNC

+#define NEON_FP3SAME_LIST(V) \

+ V(fadd, FPAdd, false) \

+ V(fsub, FPSub, true) \

+ V(fmul, FPMul, true) \

+ V(fmulx, FPMulx, true) \

+ V(fdiv, FPDiv, true) \

+ V(fmax, FPMax, false) \

+ V(fmin, FPMin, false) \

+ V(fmaxnm, FPMaxNM, false) \

+ V(fminnm, FPMinNM, false)

+#define DECLARE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \

+ template <typename T> \

+ LogicVRegister FN(VectorFormat vform, LogicVRegister dst, \

+ const LogicVRegister& src1, const LogicVRegister& src2); \

+ LogicVRegister FN(VectorFormat vform, LogicVRegister dst, \

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ NEON_FP3SAME_LIST(DECLARE_NEON_FP_VECTOR_OP)

+#undef DECLARE_NEON_FP_VECTOR_OP

+#define NEON_FPPAIRWISE_LIST(V) \

+ V(faddp, fadd, FPAdd) \

+ V(fmaxp, fmax, FPMax) \

+ V(fmaxnmp, fmaxnm, FPMaxNM) \

+ V(fminp, fmin, FPMin) \

+ V(fminnmp, fminnm, FPMinNM)

+#define DECLARE_NEON_FP_PAIR_OP(FNP, FN, OP) \

+ LogicVRegister FNP(VectorFormat vform, LogicVRegister dst, \

+ const LogicVRegister& src1, const LogicVRegister& src2); \

+ LogicVRegister FNP(VectorFormat vform, LogicVRegister dst, \

+ const LogicVRegister& src);

+ NEON_FPPAIRWISE_LIST(DECLARE_NEON_FP_PAIR_OP)

+#undef DECLARE_NEON_FP_PAIR_OP

+ template <typename T>

+ LogicVRegister frecps(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister frecps(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ template <typename T>

+ LogicVRegister frsqrts(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1,

+ const LogicVRegister& src2);

+ LogicVRegister frsqrts(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1,

+ const LogicVRegister& src2);

+ template <typename T>

+ LogicVRegister fmla(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister fmla(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ template <typename T>

+ LogicVRegister fmls(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister fmls(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister fnmul(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

template <typename T>

- T FPDefaultNaN() const;

+ LogicVRegister fcmp(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ Condition cond);

+ LogicVRegister fcmp(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ Condition cond);

+ LogicVRegister fabscmp(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2,

+ Condition cond);

+ LogicVRegister fcmp_zero(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, Condition cond);

+ template <typename T>

+ LogicVRegister fneg(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister fneg(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ template <typename T>

+ LogicVRegister frecpx(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister frecpx(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ template <typename T>

+ LogicVRegister fabs_(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister fabs_(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister fabd(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src1, const LogicVRegister& src2);

+ LogicVRegister frint(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, FPRounding rounding_mode,

+ bool inexact_exception = false);

+ LogicVRegister fcvts(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, FPRounding rounding_mode,

+ int fbits = 0);

+ LogicVRegister fcvtu(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, FPRounding rounding_mode,

+ int fbits = 0);

+ LogicVRegister fcvtl(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister fcvtl2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister fcvtn(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister fcvtn2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister fcvtxn(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister fcvtxn2(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister fsqrt(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister frsqrte(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister frecpe(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, FPRounding rounding);

+ LogicVRegister ursqrte(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister urecpe(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ typedef float (Simulator::*FPMinMaxOp)(float a, float b);

+ LogicVRegister FMinMaxV(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src, FPMinMaxOp Op);

+ LogicVRegister fminv(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister fmaxv(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister fminnmv(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ LogicVRegister fmaxnmv(VectorFormat vform, LogicVRegister dst,

+ const LogicVRegister& src);

+ template <typename T>

+ T FPRecipSqrtEstimate(T op);

+ template <typename T>

+ T FPRecipEstimate(T op, FPRounding rounding);

+ template <typename T, typename R>

+ R FPToFixed(T op, int fbits, bool is_signed, FPRounding rounding);

void FPCompare(double val0, double val1);

double FPRoundInt(double value, FPRounding round_mode);

double FPToDouble(float value);

float FPToFloat(double value, FPRounding round_mode);

+ float FPToFloat(float16 value);

+ float16 FPToFloat16(float value, FPRounding round_mode);

+ float16 FPToFloat16(double value, FPRounding round_mode);

+ double recip_sqrt_estimate(double a);

+ double recip_estimate(double a);

+ double FPRecipSqrtEstimate(double a);

+ double FPRecipEstimate(double a);

double FixedToDouble(int64_t src, int fbits, FPRounding round_mode);

double UFixedToDouble(uint64_t src, int fbits, FPRounding round_mode);

float FixedToFloat(int64_t src, int fbits, FPRounding round_mode);

@@ -743,6 +2132,9 @@ class Simulator : public DecoderVisitor {

T FPMul(T op1, T op2);

template <typename T>

+ T FPMulx(T op1, T op2);

+ template <typename T>

T FPMulAdd(T a, T op1, T op2);

template <typename T>

@@ -751,17 +2143,18 @@ class Simulator : public DecoderVisitor {

template <typename T>

T FPSub(T op1, T op2);

- // Standard NaN processing.

template <typename T>

- T FPProcessNaN(T op);

- bool FPProcessNaNs(Instruction* instr);

+ T FPRecipStepFused(T op1, T op2);

template <typename T>

- T FPProcessNaNs(T op1, T op2);

+ T FPRSqrtStepFused(T op1, T op2);

- template <typename T>

- T FPProcessNaNs3(T op1, T op2, T op3);

+ // This doesn't do anything at the moment. We'll need it if we want support

+ // for cumulative exception bits or floating-point exceptions.

+ void FPProcessException() {}

+ // Standard NaN processing.

+ bool FPProcessNaNs(Instruction* instr);

void CheckStackAlignment();

@@ -773,7 +2166,7 @@ class Simulator : public DecoderVisitor {

static const uint64_t kCallerSavedRegisterCorruptionValue =

0xca11edc0de000000UL;

// This value is a NaN in both 32-bit and 64-bit FP.

- static const uint64_t kCallerSavedFPRegisterCorruptionValue =

+ static const uint64_t kCallerSavedVRegisterCorruptionValue =

0x7ff000007f801000UL;

// This value is a mix of 32/64-bits NaN and "verbose" immediate.

static const uint64_t kDefaultCPURegisterCorruptionValue =

@@ -801,7 +2194,7 @@ class Simulator : public DecoderVisitor {

SimRegister registers_[kNumberOfRegisters];

// Floating point registers

- SimFPRegister fpregisters_[kNumberOfFPRegisters];

+ SimVRegister vregisters_[kNumberOfVRegisters];

// Processor state

// bits[31, 27]: Condition flags N, Z, C, and V.

@@ -959,10 +2352,67 @@ class Simulator : public DecoderVisitor {

private:

void Init(FILE* stream);

+ template <typename T>

+ static T FPDefaultNaN();

+ template <typename T>

+ T FPProcessNaN(T op) {

+ DCHECK(std::isnan(op));

+ return fpcr().DN() ? FPDefaultNaN<T>() : ToQuietNaN(op);

+ }

+ template <typename T>

+ T FPProcessNaNs(T op1, T op2) {

+ if (IsSignallingNaN(op1)) {

+ return FPProcessNaN(op1);

+ } else if (IsSignallingNaN(op2)) {

+ return FPProcessNaN(op2);

+ } else if (std::isnan(op1)) {

+ DCHECK(IsQuietNaN(op1));

+ return FPProcessNaN(op1);

+ } else if (std::isnan(op2)) {

+ DCHECK(IsQuietNaN(op2));

+ return FPProcessNaN(op2);

+ } else {

+ return 0.0;

+ }

+ template <typename T>

+ T FPProcessNaNs3(T op1, T op2, T op3) {

+ if (IsSignallingNaN(op1)) {

+ return FPProcessNaN(op1);

+ } else if (IsSignallingNaN(op2)) {

+ return FPProcessNaN(op2);

+ } else if (IsSignallingNaN(op3)) {

+ return FPProcessNaN(op3);

+ } else if (std::isnan(op1)) {

+ DCHECK(IsQuietNaN(op1));

+ return FPProcessNaN(op1);

+ } else if (std::isnan(op2)) {

+ DCHECK(IsQuietNaN(op2));

+ return FPProcessNaN(op2);

+ } else if (std::isnan(op3)) {

+ DCHECK(IsQuietNaN(op3));

+ return FPProcessNaN(op3);

+ } else {

+ return 0.0;

+ }

int log_parameters_;

Isolate* isolate_;

};

+template <>

+inline double Simulator::FPDefaultNaN<double>() {

+ return kFP64DefaultNaN;

+template <>

+inline float Simulator::FPDefaultNaN<float>() {

+ return kFP32DefaultNaN;

// When running with the simulator transition into simulated execution at this

// point.

« no previous file with comments | « src/arm64/macro-assembler-arm64-inl.h ('k') | src/arm64/simulator-arm64.cc » ('j') | no next file with comments »