Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(63)

Unified Diff: src/arm64/simulator-arm64.h

Issue 2812573003: Reland "ARM64: Add NEON support" (Closed)
Patch Set: Add trace directory to gitignore Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/arm64/macro-assembler-arm64-inl.h ('k') | src/arm64/simulator-arm64.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/arm64/simulator-arm64.h
diff --git a/src/arm64/simulator-arm64.h b/src/arm64/simulator-arm64.h
index 3016e616e4df8b8d1c41bba8df303f8a0c851125..918f0e96de31d5dc31d4503be1902874957adbae 100644
--- a/src/arm64/simulator-arm64.h
+++ b/src/arm64/simulator-arm64.h
@@ -70,6 +70,239 @@ class SimulatorStack : public v8::internal::AllStatic {
#else // !defined(USE_SIMULATOR)
+// Assemble the specified IEEE-754 components into the target type and apply
+// appropriate rounding.
+// sign: 0 = positive, 1 = negative
+// exponent: Unbiased IEEE-754 exponent.
+// mantissa: The mantissa of the input. The top bit (which is not encoded for
+// normal IEEE-754 values) must not be omitted. This bit has the
+// value 'pow(2, exponent)'.
+//
+// The input value is assumed to be a normalized value. That is, the input may
+// not be infinity or NaN. If the source value is subnormal, it must be
+// normalized before calling this function such that the highest set bit in the
+// mantissa has the value 'pow(2, exponent)'.
+//
+// Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than
+// calling a templated FPRound.
+template <class T, int ebits, int mbits>
+T FPRound(int64_t sign, int64_t exponent, uint64_t mantissa,
+ FPRounding round_mode) {
+ static_assert((sizeof(T) * 8) >= (1 + ebits + mbits),
+ "destination type T not large enough");
+ static_assert(sizeof(T) <= sizeof(uint64_t),
+ "maximum size of destination type T is 64 bits");
+ static_assert(std::is_unsigned<T>::value,
+ "destination type T must be unsigned");
+
+ DCHECK((sign == 0) || (sign == 1));
+
+ // Only FPTieEven and FPRoundOdd rounding modes are implemented.
+ DCHECK((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
+
+ // Rounding can promote subnormals to normals, and normals to infinities. For
+ // example, a double with exponent 127 (FLT_MAX_EXP) would appear to be
+ // encodable as a float, but rounding based on the low-order mantissa bits
+ // could make it overflow. With ties-to-even rounding, this value would become
+ // an infinity.
+
+ // ---- Rounding Method ----
+ //
+ // The exponent is irrelevant in the rounding operation, so we treat the
+ // lowest-order bit that will fit into the result ('onebit') as having
+ // the value '1'. Similarly, the highest-order bit that won't fit into
+ // the result ('halfbit') has the value '0.5'. The 'point' sits between
+ // 'onebit' and 'halfbit':
+ //
+ // These bits fit into the result.
+ // |---------------------|
+ // mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ // ||
+ // / |
+ // / halfbit
+ // onebit
+ //
+ // For subnormal outputs, the range of representable bits is smaller and
+ // the position of onebit and halfbit depends on the exponent of the
+ // input, but the method is otherwise similar.
+ //
+ // onebit(frac)
+ // |
+ // | halfbit(frac) halfbit(adjusted)
+ // | / /
+ // | | |
+ // 0b00.0 (exact) -> 0b00.0 (exact) -> 0b00
+ // 0b00.0... -> 0b00.0... -> 0b00
+ // 0b00.1 (exact) -> 0b00.0111..111 -> 0b00
+ // 0b00.1... -> 0b00.1... -> 0b01
+ // 0b01.0 (exact) -> 0b01.0 (exact) -> 0b01
+ // 0b01.0... -> 0b01.0... -> 0b01
+ // 0b01.1 (exact) -> 0b01.1 (exact) -> 0b10
+ // 0b01.1... -> 0b01.1... -> 0b10
+ // 0b10.0 (exact) -> 0b10.0 (exact) -> 0b10
+ // 0b10.0... -> 0b10.0... -> 0b10
+ // 0b10.1 (exact) -> 0b10.0111..111 -> 0b10
+ // 0b10.1... -> 0b10.1... -> 0b11
+ // 0b11.0 (exact) -> 0b11.0 (exact) -> 0b11
+ // ... / | / |
+ // / | / |
+ // / |
+ // adjusted = frac - (halfbit(mantissa) & ~onebit(frac)); / |
+ //
+ // mantissa = (mantissa >> shift) + halfbit(adjusted);
+
+ const int mantissa_offset = 0;
+ const int exponent_offset = mantissa_offset + mbits;
+ const int sign_offset = exponent_offset + ebits;
+ DCHECK_EQ(sign_offset, static_cast<int>(sizeof(T) * 8 - 1));
+
+ // Bail out early for zero inputs.
+ if (mantissa == 0) {
+ return static_cast<T>(sign << sign_offset);
+ }
+
+ // If all bits in the exponent are set, the value is infinite or NaN.
+ // This is true for all binary IEEE-754 formats.
+ const int infinite_exponent = (1 << ebits) - 1;
+ const int max_normal_exponent = infinite_exponent - 1;
+
+ // Apply the exponent bias to encode it for the result. Doing this early makes
+ // it easy to detect values that will be infinite or subnormal.
+ exponent += max_normal_exponent >> 1;
+
+ if (exponent > max_normal_exponent) {
+ // Overflow: the input is too large for the result type to represent.
+ if (round_mode == FPTieEven) {
+ // FPTieEven rounding mode handles overflows using infinities.
+ exponent = infinite_exponent;
+ mantissa = 0;
+ } else {
+ DCHECK_EQ(round_mode, FPRoundOdd);
+ // FPRoundOdd rounding mode handles overflows using the largest magnitude
+ // normal number.
+ exponent = max_normal_exponent;
+ mantissa = (UINT64_C(1) << exponent_offset) - 1;
+ }
+ return static_cast<T>((sign << sign_offset) |
+ (exponent << exponent_offset) |
+ (mantissa << mantissa_offset));
+ }
+
+ // Calculate the shift required to move the top mantissa bit to the proper
+ // place in the destination type.
+ const int highest_significant_bit = 63 - CountLeadingZeros(mantissa, 64);
+ int shift = highest_significant_bit - mbits;
+
+ if (exponent <= 0) {
+ // The output will be subnormal (before rounding).
+ // For subnormal outputs, the shift must be adjusted by the exponent. The +1
+ // is necessary because the exponent of a subnormal value (encoded as 0) is
+ // the same as the exponent of the smallest normal value (encoded as 1).
+ shift += -exponent + 1;
+
+ // Handle inputs that would produce a zero output.
+ //
+ // Shifts higher than highest_significant_bit+1 will always produce a zero
+ // result. A shift of exactly highest_significant_bit+1 might produce a
+ // non-zero result after rounding.
+ if (shift > (highest_significant_bit + 1)) {
+ if (round_mode == FPTieEven) {
+ // The result will always be +/-0.0.
+ return static_cast<T>(sign << sign_offset);
+ } else {
+ DCHECK_EQ(round_mode, FPRoundOdd);
+ DCHECK_NE(mantissa, 0U);
+ // For FPRoundOdd, if the mantissa is too small to represent and
+ // non-zero return the next "odd" value.
+ return static_cast<T>((sign << sign_offset) | 1);
+ }
+ }
+
+ // Properly encode the exponent for a subnormal output.
+ exponent = 0;
+ } else {
+ // Clear the topmost mantissa bit, since this is not encoded in IEEE-754
+ // normal values.
+ mantissa &= ~(UINT64_C(1) << highest_significant_bit);
+ }
+
+ if (shift > 0) {
+ if (round_mode == FPTieEven) {
+ // We have to shift the mantissa to the right. Some precision is lost, so
+ // we need to apply rounding.
+ uint64_t onebit_mantissa = (mantissa >> (shift)) & 1;
+ uint64_t halfbit_mantissa = (mantissa >> (shift - 1)) & 1;
+ uint64_t adjustment = (halfbit_mantissa & ~onebit_mantissa);
+ uint64_t adjusted = mantissa - adjustment;
+ T halfbit_adjusted = (adjusted >> (shift - 1)) & 1;
+
+ T result =
+ static_cast<T>((sign << sign_offset) | (exponent << exponent_offset) |
+ ((mantissa >> shift) << mantissa_offset));
+
+ // A very large mantissa can overflow during rounding. If this happens,
+ // the exponent should be incremented and the mantissa set to 1.0
+ // (encoded as 0). Applying halfbit_adjusted after assembling the float
+ // has the nice side-effect that this case is handled for free.
+ //
+ // This also handles cases where a very large finite value overflows to
+ // infinity, or where a very large subnormal value overflows to become
+ // normal.
+ return result + halfbit_adjusted;
+ } else {
+ DCHECK_EQ(round_mode, FPRoundOdd);
+ // If any bits at position halfbit or below are set, onebit (ie. the
+ // bottom bit of the resulting mantissa) must be set.
+ uint64_t fractional_bits = mantissa & ((UINT64_C(1) << shift) - 1);
+ if (fractional_bits != 0) {
+ mantissa |= UINT64_C(1) << shift;
+ }
+
+ return static_cast<T>((sign << sign_offset) |
+ (exponent << exponent_offset) |
+ ((mantissa >> shift) << mantissa_offset));
+ }
+ } else {
+ // We have to shift the mantissa to the left (or not at all). The input
+ // mantissa is exactly representable in the output mantissa, so apply no
+ // rounding correction.
+ return static_cast<T>((sign << sign_offset) |
+ (exponent << exponent_offset) |
+ ((mantissa << -shift) << mantissa_offset));
+ }
+}
+
+// Representation of memory, with typed getters and setters for access.
+class SimMemory {
+ public:
+ template <typename T>
+ static T AddressUntag(T address) {
+ // Cast the address using a C-style cast. A reinterpret_cast would be
+ // appropriate, but it can't cast one integral type to another.
+ uint64_t bits = (uint64_t)address;
+ return (T)(bits & ~kAddressTagMask);
+ }
+
+ template <typename T, typename A>
+ static T Read(A address) {
+ T value;
+ address = AddressUntag(address);
+ DCHECK((sizeof(value) == 1) || (sizeof(value) == 2) ||
+ (sizeof(value) == 4) || (sizeof(value) == 8) ||
+ (sizeof(value) == 16));
+ memcpy(&value, reinterpret_cast<const char*>(address), sizeof(value));
+ return value;
+ }
+
+ template <typename T, typename A>
+ static void Write(A address, T value) {
+ address = AddressUntag(address);
+ DCHECK((sizeof(value) == 1) || (sizeof(value) == 2) ||
+ (sizeof(value) == 4) || (sizeof(value) == 8) ||
+ (sizeof(value) == 16));
+ memcpy(reinterpret_cast<char*>(address), &value, sizeof(value));
+ }
+};
// The proper way to initialize a simulated system register (such as NZCV) is as
// follows:
@@ -125,29 +358,330 @@ class SimSystemRegister {
// Represent a register (r0-r31, v0-v31).
+template <int kSizeInBytes>
class SimRegisterBase {
public:
template<typename T>
void Set(T new_value) {
- value_ = 0;
+ static_assert(sizeof(new_value) <= kSizeInBytes,
+ "Size of new_value must be <= size of template type.");
+ if (sizeof(new_value) < kSizeInBytes) {
+ // All AArch64 registers are zero-extending.
+ memset(value_ + sizeof(new_value), 0, kSizeInBytes - sizeof(new_value));
+ }
memcpy(&value_, &new_value, sizeof(T));
+ NotifyRegisterWrite();
}
- template<typename T>
- T Get() const {
+ // Insert a typed value into a register, leaving the rest of the register
+ // unchanged. The lane parameter indicates where in the register the value
+ // should be inserted, in the range [ 0, sizeof(value_) / sizeof(T) ), where
+ // 0 represents the least significant bits.
+ template <typename T>
+ void Insert(int lane, T new_value) {
+ DCHECK_GE(lane, 0);
+ DCHECK_LE(sizeof(new_value) + (lane * sizeof(new_value)),
+ static_cast<unsigned>(kSizeInBytes));
+ memcpy(&value_[lane * sizeof(new_value)], &new_value, sizeof(new_value));
+ NotifyRegisterWrite();
+ }
+
+ template <typename T>
+ T Get(int lane = 0) const {
T result;
- memcpy(&result, &value_, sizeof(T));
+ DCHECK_GE(lane, 0);
+ DCHECK_LE(sizeof(result) + (lane * sizeof(result)),
+ static_cast<unsigned>(kSizeInBytes));
+ memcpy(&result, &value_[lane * sizeof(result)], sizeof(result));
return result;
}
+ // TODO(all): Make this return a map of updated bytes, so that we can
+ // highlight updated lanes for load-and-insert. (That never happens for scalar
+ // code, but NEON has some instructions that can update individual lanes.)
+ bool WrittenSinceLastLog() const { return written_since_last_log_; }
+
+ void NotifyRegisterLogged() { written_since_last_log_ = false; }
+
protected:
- int64_t value_;
+ uint8_t value_[kSizeInBytes];
+
+ // Helpers to aid with register tracing.
+ bool written_since_last_log_;
+
+ void NotifyRegisterWrite() { written_since_last_log_ = true; }
};
+typedef SimRegisterBase<kXRegSize> SimRegister; // r0-r31
+typedef SimRegisterBase<kQRegSize> SimVRegister; // v0-v31
+
+// Representation of a vector register, with typed getters and setters for lanes
+// and additional information to represent lane state.
+class LogicVRegister {
+ public:
+ inline LogicVRegister(SimVRegister& other) // NOLINT
+ : register_(other) {
+ for (unsigned i = 0; i < arraysize(saturated_); i++) {
+ saturated_[i] = kNotSaturated;
+ }
+ for (unsigned i = 0; i < arraysize(round_); i++) {
+ round_[i] = false;
+ }
+ }
+
+ int64_t Int(VectorFormat vform, int index) const {
+ int64_t element;
+ switch (LaneSizeInBitsFromFormat(vform)) {
+ case 8:
+ element = register_.Get<int8_t>(index);
+ break;
+ case 16:
+ element = register_.Get<int16_t>(index);
+ break;
+ case 32:
+ element = register_.Get<int32_t>(index);
+ break;
+ case 64:
+ element = register_.Get<int64_t>(index);
+ break;
+ default:
+ UNREACHABLE();
+ return 0;
+ }
+ return element;
+ }
+
+ uint64_t Uint(VectorFormat vform, int index) const {
+ uint64_t element;
+ switch (LaneSizeInBitsFromFormat(vform)) {
+ case 8:
+ element = register_.Get<uint8_t>(index);
+ break;
+ case 16:
+ element = register_.Get<uint16_t>(index);
+ break;
+ case 32:
+ element = register_.Get<uint32_t>(index);
+ break;
+ case 64:
+ element = register_.Get<uint64_t>(index);
+ break;
+ default:
+ UNREACHABLE();
+ return 0;
+ }
+ return element;
+ }
+
+ uint64_t UintLeftJustified(VectorFormat vform, int index) const {
+ return Uint(vform, index) << (64 - LaneSizeInBitsFromFormat(vform));
+ }
+
+ int64_t IntLeftJustified(VectorFormat vform, int index) const {
+ uint64_t value = UintLeftJustified(vform, index);
+ int64_t result;
+ memcpy(&result, &value, sizeof(result));
+ return result;
+ }
+
+ void SetInt(VectorFormat vform, int index, int64_t value) const {
+ switch (LaneSizeInBitsFromFormat(vform)) {
+ case 8:
+ register_.Insert(index, static_cast<int8_t>(value));
+ break;
+ case 16:
+ register_.Insert(index, static_cast<int16_t>(value));
+ break;
+ case 32:
+ register_.Insert(index, static_cast<int32_t>(value));
+ break;
+ case 64:
+ register_.Insert(index, static_cast<int64_t>(value));
+ break;
+ default:
+ UNREACHABLE();
+ return;
+ }
+ }
+
+ void SetIntArray(VectorFormat vform, const int64_t* src) const {
+ ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ SetInt(vform, i, src[i]);
+ }
+ }
+
+ void SetUint(VectorFormat vform, int index, uint64_t value) const {
+ switch (LaneSizeInBitsFromFormat(vform)) {
+ case 8:
+ register_.Insert(index, static_cast<uint8_t>(value));
+ break;
+ case 16:
+ register_.Insert(index, static_cast<uint16_t>(value));
+ break;
+ case 32:
+ register_.Insert(index, static_cast<uint32_t>(value));
+ break;
+ case 64:
+ register_.Insert(index, static_cast<uint64_t>(value));
+ break;
+ default:
+ UNREACHABLE();
+ return;
+ }
+ }
+
+ void SetUintArray(VectorFormat vform, const uint64_t* src) const {
+ ClearForWrite(vform);
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ SetUint(vform, i, src[i]);
+ }
+ }
+
+ void ReadUintFromMem(VectorFormat vform, int index, uint64_t addr) const;
+
+ void WriteUintToMem(VectorFormat vform, int index, uint64_t addr) const;
+
+ template <typename T>
+ T Float(int index) const {
+ return register_.Get<T>(index);
+ }
+
+ template <typename T>
+ void SetFloat(int index, T value) const {
+ register_.Insert(index, value);
+ }
+
+ // When setting a result in a register of size less than Q, the top bits of
+ // the Q register must be cleared.
+ void ClearForWrite(VectorFormat vform) const {
+ unsigned size = RegisterSizeInBytesFromFormat(vform);
+ for (unsigned i = size; i < kQRegSize; i++) {
+ SetUint(kFormat16B, i, 0);
+ }
+ }
-typedef SimRegisterBase SimRegister; // r0-r31
-typedef SimRegisterBase SimFPRegister; // v0-v31
+ // Saturation state for each lane of a vector.
+ enum Saturation {
+ kNotSaturated = 0,
+ kSignedSatPositive = 1 << 0,
+ kSignedSatNegative = 1 << 1,
+ kSignedSatMask = kSignedSatPositive | kSignedSatNegative,
+ kSignedSatUndefined = kSignedSatMask,
+ kUnsignedSatPositive = 1 << 2,
+ kUnsignedSatNegative = 1 << 3,
+ kUnsignedSatMask = kUnsignedSatPositive | kUnsignedSatNegative,
+ kUnsignedSatUndefined = kUnsignedSatMask
+ };
+
+ // Getters for saturation state.
+ Saturation GetSignedSaturation(int index) {
+ return static_cast<Saturation>(saturated_[index] & kSignedSatMask);
+ }
+
+ Saturation GetUnsignedSaturation(int index) {
+ return static_cast<Saturation>(saturated_[index] & kUnsignedSatMask);
+ }
+
+ // Setters for saturation state.
+ void ClearSat(int index) { saturated_[index] = kNotSaturated; }
+
+ void SetSignedSat(int index, bool positive) {
+ SetSatFlag(index, positive ? kSignedSatPositive : kSignedSatNegative);
+ }
+ void SetUnsignedSat(int index, bool positive) {
+ SetSatFlag(index, positive ? kUnsignedSatPositive : kUnsignedSatNegative);
+ }
+
+ void SetSatFlag(int index, Saturation sat) {
+ saturated_[index] = static_cast<Saturation>(saturated_[index] | sat);
+ DCHECK_NE(sat & kUnsignedSatMask, kUnsignedSatUndefined);
+ DCHECK_NE(sat & kSignedSatMask, kSignedSatUndefined);
+ }
+
+ // Saturate lanes of a vector based on saturation state.
+ LogicVRegister& SignedSaturate(VectorFormat vform) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ Saturation sat = GetSignedSaturation(i);
+ if (sat == kSignedSatPositive) {
+ SetInt(vform, i, MaxIntFromFormat(vform));
+ } else if (sat == kSignedSatNegative) {
+ SetInt(vform, i, MinIntFromFormat(vform));
+ }
+ }
+ return *this;
+ }
+
+ LogicVRegister& UnsignedSaturate(VectorFormat vform) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ Saturation sat = GetUnsignedSaturation(i);
+ if (sat == kUnsignedSatPositive) {
+ SetUint(vform, i, MaxUintFromFormat(vform));
+ } else if (sat == kUnsignedSatNegative) {
+ SetUint(vform, i, 0);
+ }
+ }
+ return *this;
+ }
+
+ // Getter for rounding state.
+ bool GetRounding(int index) { return round_[index]; }
+
+ // Setter for rounding state.
+ void SetRounding(int index, bool round) { round_[index] = round; }
+
+ // Round lanes of a vector based on rounding state.
+ LogicVRegister& Round(VectorFormat vform) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ SetUint(vform, i, Uint(vform, i) + (GetRounding(i) ? 1 : 0));
+ }
+ return *this;
+ }
+
+ // Unsigned halve lanes of a vector, and use the saturation state to set the
+ // top bit.
+ LogicVRegister& Uhalve(VectorFormat vform) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ uint64_t val = Uint(vform, i);
+ SetRounding(i, (val & 1) == 1);
+ val >>= 1;
+ if (GetUnsignedSaturation(i) != kNotSaturated) {
+ // If the operation causes unsigned saturation, the bit shifted into the
+ // most significant bit must be set.
+ val |= (MaxUintFromFormat(vform) >> 1) + 1;
+ }
+ SetInt(vform, i, val);
+ }
+ return *this;
+ }
+
+ // Signed halve lanes of a vector, and use the carry state to set the top bit.
+ LogicVRegister& Halve(VectorFormat vform) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ int64_t val = Int(vform, i);
+ SetRounding(i, (val & 1) == 1);
+ val >>= 1;
+ if (GetSignedSaturation(i) != kNotSaturated) {
+ // If the operation causes signed saturation, the sign bit must be
+ // inverted.
+ val ^= (MaxUintFromFormat(vform) >> 1) + 1;
+ }
+ SetInt(vform, i, val);
+ }
+ return *this;
+ }
+
+ private:
+ SimVRegister& register_;
+
+ // Allocate one saturation state entry per lane; largest register is type Q,
+ // and lanes can be a minimum of one byte wide.
+ Saturation saturated_[kQRegSize];
+
+ // Allocate one rounding state entry per lane.
+ bool round_[kQRegSize];
+};
class Simulator : public DecoderVisitor {
public:
@@ -315,6 +849,7 @@ class Simulator : public DecoderVisitor {
CheckBreakNext();
Decode(pc_);
increment_pc();
+ LogAllWrittenRegisters();
CheckBreakpoints();
}
@@ -333,7 +868,7 @@ class Simulator : public DecoderVisitor {
//
template<typename T>
T reg(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister) const {
- DCHECK(code < kNumberOfRegisters);
+ DCHECK_LT(code, static_cast<unsigned>(kNumberOfRegisters));
if (IsZeroRegister(code, r31mode)) {
return 0;
}
@@ -349,6 +884,8 @@ class Simulator : public DecoderVisitor {
return reg<int64_t>(code, r31mode);
}
+ enum RegLogMode { LogRegWrites, NoRegLog };
+
// Write 'value' into an integer register. The value is zero-extended. This
// behaviour matches AArch64 register writes.
template<typename T>
@@ -373,7 +910,7 @@ class Simulator : public DecoderVisitor {
template <typename T>
void set_reg_no_log(unsigned code, T value,
Reg31Mode r31mode = Reg31IsZeroRegister) {
- DCHECK(code < kNumberOfRegisters);
+ DCHECK_LT(code, static_cast<unsigned>(kNumberOfRegisters));
if (!IsZeroRegister(code, r31mode)) {
registers_[code].Set(value);
}
@@ -392,16 +929,39 @@ class Simulator : public DecoderVisitor {
// Commonly-used special cases.
template<typename T>
void set_lr(T value) {
- DCHECK(sizeof(T) == kPointerSize);
+ DCHECK_EQ(sizeof(T), static_cast<unsigned>(kPointerSize));
set_reg(kLinkRegCode, value);
}
template<typename T>
void set_sp(T value) {
- DCHECK(sizeof(T) == kPointerSize);
+ DCHECK_EQ(sizeof(T), static_cast<unsigned>(kPointerSize));
set_reg(31, value, Reg31IsStackPointer);
}
+ // Vector register accessors.
+ // These are equivalent to the integer register accessors, but for vector
+ // registers.
+
+ // A structure for representing a 128-bit Q register.
+ struct qreg_t {
+ uint8_t val[kQRegSize];
+ };
+
+ // Basic accessor: read the register as the specified type.
+ template <typename T>
+ T vreg(unsigned code) const {
+ static_assert((sizeof(T) == kBRegSize) || (sizeof(T) == kHRegSize) ||
+ (sizeof(T) == kSRegSize) || (sizeof(T) == kDRegSize) ||
+ (sizeof(T) == kQRegSize),
+ "Template type must match size of register.");
+ DCHECK_LT(code, static_cast<unsigned>(kNumberOfVRegisters));
+
+ return vregisters_[code].Get<T>();
+ }
+
+ inline SimVRegister& vreg(unsigned code) { return vregisters_[code]; }
+
int64_t sp() { return xreg(31, Reg31IsStackPointer); }
int64_t jssp() { return xreg(kJSSPCode, Reg31IsStackPointer); }
int64_t fp() {
@@ -411,87 +971,135 @@ class Simulator : public DecoderVisitor {
Address get_sp() const { return reg<Address>(31, Reg31IsStackPointer); }
- template<typename T>
- T fpreg(unsigned code) const {
- DCHECK(code < kNumberOfRegisters);
- return fpregisters_[code].Get<T>();
- }
+ // Common specialized accessors for the vreg() template.
+ uint8_t breg(unsigned code) const { return vreg<uint8_t>(code); }
- // Common specialized accessors for the fpreg() template.
- float sreg(unsigned code) const {
- return fpreg<float>(code);
- }
+ float hreg(unsigned code) const { return vreg<uint16_t>(code); }
- uint32_t sreg_bits(unsigned code) const {
- return fpreg<uint32_t>(code);
- }
+ float sreg(unsigned code) const { return vreg<float>(code); }
- double dreg(unsigned code) const {
- return fpreg<double>(code);
- }
+ uint32_t sreg_bits(unsigned code) const { return vreg<uint32_t>(code); }
- uint64_t dreg_bits(unsigned code) const {
- return fpreg<uint64_t>(code);
- }
+ double dreg(unsigned code) const { return vreg<double>(code); }
+
+ uint64_t dreg_bits(unsigned code) const { return vreg<uint64_t>(code); }
+
+ qreg_t qreg(unsigned code) const { return vreg<qreg_t>(code); }
+
+ // As above, with parameterized size and return type. The value is
+ // either zero-extended or truncated to fit, as required.
+ template <typename T>
+ T vreg(unsigned size, unsigned code) const {
+ uint64_t raw = 0;
+ T result;
- double fpreg(unsigned size, unsigned code) const {
switch (size) {
- case kSRegSizeInBits: return sreg(code);
- case kDRegSizeInBits: return dreg(code);
+ case kSRegSize:
+ raw = vreg<uint32_t>(code);
+ break;
+ case kDRegSize:
+ raw = vreg<uint64_t>(code);
+ break;
default:
UNREACHABLE();
- return 0.0;
+ break;
}
+
+ static_assert(sizeof(result) <= sizeof(raw),
+ "Template type must be <= 64 bits.");
+ // Copy the result and truncate to fit. This assumes a little-endian host.
+ memcpy(&result, &raw, sizeof(result));
+ return result;
}
// Write 'value' into a floating-point register. The value is zero-extended.
// This behaviour matches AArch64 register writes.
- template<typename T>
- void set_fpreg(unsigned code, T value) {
- set_fpreg_no_log(code, value);
-
- if (sizeof(value) <= kSRegSize) {
- LogFPRegister(code, kPrintSRegValue);
- } else {
- LogFPRegister(code, kPrintDRegValue);
+ template <typename T>
+ void set_vreg(unsigned code, T value, RegLogMode log_mode = LogRegWrites) {
+ static_assert(
+ (sizeof(value) == kBRegSize) || (sizeof(value) == kHRegSize) ||
+ (sizeof(value) == kSRegSize) || (sizeof(value) == kDRegSize) ||
+ (sizeof(value) == kQRegSize),
+ "Template type must match size of register.");
+ DCHECK_LT(code, static_cast<unsigned>(kNumberOfVRegisters));
+ vregisters_[code].Set(value);
+
+ if (log_mode == LogRegWrites) {
+ LogVRegister(code, GetPrintRegisterFormat(value));
}
}
- // Common specialized accessors for the set_fpreg() template.
- void set_sreg(unsigned code, float value) {
- set_fpreg(code, value);
+ // Common specialized accessors for the set_vreg() template.
+ void set_breg(unsigned code, int8_t value,
+ RegLogMode log_mode = LogRegWrites) {
+ set_vreg(code, value, log_mode);
+ }
+
+ void set_hreg(unsigned code, int16_t value,
+ RegLogMode log_mode = LogRegWrites) {
+ set_vreg(code, value, log_mode);
+ }
+
+ void set_sreg(unsigned code, float value,
+ RegLogMode log_mode = LogRegWrites) {
+ set_vreg(code, value, log_mode);
+ }
+
+ void set_sreg_bits(unsigned code, uint32_t value,
+ RegLogMode log_mode = LogRegWrites) {
+ set_vreg(code, value, log_mode);
}
- void set_sreg_bits(unsigned code, uint32_t value) {
- set_fpreg(code, value);
+ void set_dreg(unsigned code, double value,
+ RegLogMode log_mode = LogRegWrites) {
+ set_vreg(code, value, log_mode);
}
- void set_dreg(unsigned code, double value) {
- set_fpreg(code, value);
+ void set_dreg_bits(unsigned code, uint64_t value,
+ RegLogMode log_mode = LogRegWrites) {
+ set_vreg(code, value, log_mode);
}
- void set_dreg_bits(unsigned code, uint64_t value) {
- set_fpreg(code, value);
+ void set_qreg(unsigned code, qreg_t value,
+ RegLogMode log_mode = LogRegWrites) {
+ set_vreg(code, value, log_mode);
}
// As above, but don't automatically log the register update.
template <typename T>
- void set_fpreg_no_log(unsigned code, T value) {
- DCHECK((sizeof(value) == kDRegSize) || (sizeof(value) == kSRegSize));
- DCHECK(code < kNumberOfFPRegisters);
- fpregisters_[code].Set(value);
+ void set_vreg_no_log(unsigned code, T value) {
+ STATIC_ASSERT((sizeof(value) == kBRegSize) ||
+ (sizeof(value) == kHRegSize) ||
+ (sizeof(value) == kSRegSize) ||
+ (sizeof(value) == kDRegSize) || (sizeof(value) == kQRegSize));
+ DCHECK_LT(code, static_cast<unsigned>(kNumberOfVRegisters));
+ vregisters_[code].Set(value);
+ }
+
+ void set_breg_no_log(unsigned code, uint8_t value) {
+ set_vreg_no_log(code, value);
+ }
+
+ void set_hreg_no_log(unsigned code, uint16_t value) {
+ set_vreg_no_log(code, value);
}
void set_sreg_no_log(unsigned code, float value) {
- set_fpreg_no_log(code, value);
+ set_vreg_no_log(code, value);
}
void set_dreg_no_log(unsigned code, double value) {
- set_fpreg_no_log(code, value);
+ set_vreg_no_log(code, value);
+ }
+
+ void set_qreg_no_log(unsigned code, qreg_t value) {
+ set_vreg_no_log(code, value);
}
SimSystemRegister& nzcv() { return nzcv_; }
SimSystemRegister& fpcr() { return fpcr_; }
+ FPRounding RMode() { return static_cast<FPRounding>(fpcr_.RMode()); }
+ bool DN() { return fpcr_.DN() != 0; }
// Debug helpers
@@ -518,66 +1126,195 @@ class Simulator : public DecoderVisitor {
// Print all registers of the specified types.
void PrintRegisters();
- void PrintFPRegisters();
+ void PrintVRegisters();
void PrintSystemRegisters();
- // Like Print* (above), but respect log_parameters().
- void LogSystemRegisters() {
- if (log_parameters() & LOG_SYS_REGS) PrintSystemRegisters();
+ // As above, but only print the registers that have been updated.
+ void PrintWrittenRegisters();
+ void PrintWrittenVRegisters();
+
+ // As above, but respect LOG_REG and LOG_VREG.
+ void LogWrittenRegisters() {
+ if (log_parameters() & LOG_REGS) PrintWrittenRegisters();
+ }
+ void LogWrittenVRegisters() {
+ if (log_parameters() & LOG_VREGS) PrintWrittenVRegisters();
+ }
+ void LogAllWrittenRegisters() {
+ LogWrittenRegisters();
+ LogWrittenVRegisters();
+ }
+
+ // Specify relevant register formats for Print(V)Register and related helpers.
+ enum PrintRegisterFormat {
+ // The lane size.
+ kPrintRegLaneSizeB = 0 << 0,
+ kPrintRegLaneSizeH = 1 << 0,
+ kPrintRegLaneSizeS = 2 << 0,
+ kPrintRegLaneSizeW = kPrintRegLaneSizeS,
+ kPrintRegLaneSizeD = 3 << 0,
+ kPrintRegLaneSizeX = kPrintRegLaneSizeD,
+ kPrintRegLaneSizeQ = 4 << 0,
+
+ kPrintRegLaneSizeOffset = 0,
+ kPrintRegLaneSizeMask = 7 << 0,
+
+ // The lane count.
+ kPrintRegAsScalar = 0,
+ kPrintRegAsDVector = 1 << 3,
+ kPrintRegAsQVector = 2 << 3,
+
+ kPrintRegAsVectorMask = 3 << 3,
+
+ // Indicate floating-point format lanes. (This flag is only supported for S-
+ // and D-sized lanes.)
+ kPrintRegAsFP = 1 << 5,
+
+ // Supported combinations.
+
+ kPrintXReg = kPrintRegLaneSizeX | kPrintRegAsScalar,
+ kPrintWReg = kPrintRegLaneSizeW | kPrintRegAsScalar,
+ kPrintSReg = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP,
+ kPrintDReg = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP,
+
+ kPrintReg1B = kPrintRegLaneSizeB | kPrintRegAsScalar,
+ kPrintReg8B = kPrintRegLaneSizeB | kPrintRegAsDVector,
+ kPrintReg16B = kPrintRegLaneSizeB | kPrintRegAsQVector,
+ kPrintReg1H = kPrintRegLaneSizeH | kPrintRegAsScalar,
+ kPrintReg4H = kPrintRegLaneSizeH | kPrintRegAsDVector,
+ kPrintReg8H = kPrintRegLaneSizeH | kPrintRegAsQVector,
+ kPrintReg1S = kPrintRegLaneSizeS | kPrintRegAsScalar,
+ kPrintReg2S = kPrintRegLaneSizeS | kPrintRegAsDVector,
+ kPrintReg4S = kPrintRegLaneSizeS | kPrintRegAsQVector,
+ kPrintReg1SFP = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP,
+ kPrintReg2SFP = kPrintRegLaneSizeS | kPrintRegAsDVector | kPrintRegAsFP,
+ kPrintReg4SFP = kPrintRegLaneSizeS | kPrintRegAsQVector | kPrintRegAsFP,
+ kPrintReg1D = kPrintRegLaneSizeD | kPrintRegAsScalar,
+ kPrintReg2D = kPrintRegLaneSizeD | kPrintRegAsQVector,
+ kPrintReg1DFP = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP,
+ kPrintReg2DFP = kPrintRegLaneSizeD | kPrintRegAsQVector | kPrintRegAsFP,
+ kPrintReg1Q = kPrintRegLaneSizeQ | kPrintRegAsScalar
+ };
+
+ unsigned GetPrintRegLaneSizeInBytesLog2(PrintRegisterFormat format) {
+ return (format & kPrintRegLaneSizeMask) >> kPrintRegLaneSizeOffset;
}
- void LogRegisters() {
- if (log_parameters() & LOG_REGS) PrintRegisters();
+
+ unsigned GetPrintRegLaneSizeInBytes(PrintRegisterFormat format) {
+ return 1 << GetPrintRegLaneSizeInBytesLog2(format);
}
- void LogFPRegisters() {
- if (log_parameters() & LOG_FP_REGS) PrintFPRegisters();
+
+ unsigned GetPrintRegSizeInBytesLog2(PrintRegisterFormat format) {
+ if (format & kPrintRegAsDVector) return kDRegSizeLog2;
+ if (format & kPrintRegAsQVector) return kQRegSizeLog2;
+
+ // Scalar types.
+ return GetPrintRegLaneSizeInBytesLog2(format);
}
- // Specify relevant register sizes, for PrintFPRegister.
- //
- // These values are bit masks; they can be combined in case multiple views of
- // a machine register are interesting.
- enum PrintFPRegisterSizes {
- kPrintDRegValue = 1 << kDRegSize,
- kPrintSRegValue = 1 << kSRegSize,
- kPrintAllFPRegValues = kPrintDRegValue | kPrintSRegValue
- };
+ unsigned GetPrintRegSizeInBytes(PrintRegisterFormat format) {
+ return 1 << GetPrintRegSizeInBytesLog2(format);
+ }
+
+ unsigned GetPrintRegLaneCount(PrintRegisterFormat format) {
+ unsigned reg_size_log2 = GetPrintRegSizeInBytesLog2(format);
+ unsigned lane_size_log2 = GetPrintRegLaneSizeInBytesLog2(format);
+ DCHECK_GE(reg_size_log2, lane_size_log2);
+ return 1 << (reg_size_log2 - lane_size_log2);
+ }
+
+ template <typename T>
+ PrintRegisterFormat GetPrintRegisterFormat(T value) {
+ return GetPrintRegisterFormatForSize(sizeof(value));
+ }
+
+ PrintRegisterFormat GetPrintRegisterFormat(double value) {
+ static_assert(sizeof(value) == kDRegSize,
+ "D register must be size of double.");
+ return GetPrintRegisterFormatForSizeFP(sizeof(value));
+ }
+
+ PrintRegisterFormat GetPrintRegisterFormat(float value) {
+ static_assert(sizeof(value) == kSRegSize,
+ "S register must be size of float.");
+ return GetPrintRegisterFormatForSizeFP(sizeof(value));
+ }
+
+ PrintRegisterFormat GetPrintRegisterFormat(VectorFormat vform);
+ PrintRegisterFormat GetPrintRegisterFormatFP(VectorFormat vform);
+
+ PrintRegisterFormat GetPrintRegisterFormatForSize(size_t reg_size,
+ size_t lane_size);
+
+ PrintRegisterFormat GetPrintRegisterFormatForSize(size_t size) {
+ return GetPrintRegisterFormatForSize(size, size);
+ }
+
+ PrintRegisterFormat GetPrintRegisterFormatForSizeFP(size_t size) {
+ switch (size) {
+ default:
+ UNREACHABLE();
+ case kDRegSize:
+ return kPrintDReg;
+ case kSRegSize:
+ return kPrintSReg;
+ }
+ }
+
+ PrintRegisterFormat GetPrintRegisterFormatTryFP(PrintRegisterFormat format) {
+ if ((GetPrintRegLaneSizeInBytes(format) == kSRegSize) ||
+ (GetPrintRegLaneSizeInBytes(format) == kDRegSize)) {
+ return static_cast<PrintRegisterFormat>(format | kPrintRegAsFP);
+ }
+ return format;
+ }
// Print individual register values (after update).
void PrintRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer);
- void PrintFPRegister(unsigned code,
- PrintFPRegisterSizes sizes = kPrintAllFPRegValues);
+ void PrintVRegister(unsigned code, PrintRegisterFormat sizes);
void PrintSystemRegister(SystemRegister id);
// Like Print* (above), but respect log_parameters().
void LogRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer) {
if (log_parameters() & LOG_REGS) PrintRegister(code, r31mode);
}
- void LogFPRegister(unsigned code,
- PrintFPRegisterSizes sizes = kPrintAllFPRegValues) {
- if (log_parameters() & LOG_FP_REGS) PrintFPRegister(code, sizes);
+ void LogVRegister(unsigned code, PrintRegisterFormat format) {
+ if (log_parameters() & LOG_VREGS) PrintVRegister(code, format);
}
void LogSystemRegister(SystemRegister id) {
if (log_parameters() & LOG_SYS_REGS) PrintSystemRegister(id);
}
// Print memory accesses.
- void PrintRead(uintptr_t address, size_t size, unsigned reg_code);
- void PrintReadFP(uintptr_t address, size_t size, unsigned reg_code);
- void PrintWrite(uintptr_t address, size_t size, unsigned reg_code);
- void PrintWriteFP(uintptr_t address, size_t size, unsigned reg_code);
+ void PrintRead(uintptr_t address, unsigned reg_code,
+ PrintRegisterFormat format);
+ void PrintWrite(uintptr_t address, unsigned reg_code,
+ PrintRegisterFormat format);
+ void PrintVRead(uintptr_t address, unsigned reg_code,
+ PrintRegisterFormat format, unsigned lane);
+ void PrintVWrite(uintptr_t address, unsigned reg_code,
+ PrintRegisterFormat format, unsigned lane);
// Like Print* (above), but respect log_parameters().
- void LogRead(uintptr_t address, size_t size, unsigned reg_code) {
- if (log_parameters() & LOG_REGS) PrintRead(address, size, reg_code);
- }
- void LogReadFP(uintptr_t address, size_t size, unsigned reg_code) {
- if (log_parameters() & LOG_FP_REGS) PrintReadFP(address, size, reg_code);
- }
- void LogWrite(uintptr_t address, size_t size, unsigned reg_code) {
- if (log_parameters() & LOG_WRITE) PrintWrite(address, size, reg_code);
+ void LogRead(uintptr_t address, unsigned reg_code,
+ PrintRegisterFormat format) {
+ if (log_parameters() & LOG_REGS) PrintRead(address, reg_code, format);
+ }
+ void LogWrite(uintptr_t address, unsigned reg_code,
+ PrintRegisterFormat format) {
+ if (log_parameters() & LOG_WRITE) PrintWrite(address, reg_code, format);
+ }
+ void LogVRead(uintptr_t address, unsigned reg_code,
+ PrintRegisterFormat format, unsigned lane = 0) {
+ if (log_parameters() & LOG_VREGS) {
+ PrintVRead(address, reg_code, format, lane);
+ }
}
- void LogWriteFP(uintptr_t address, size_t size, unsigned reg_code) {
- if (log_parameters() & LOG_WRITE) PrintWriteFP(address, size, reg_code);
+ void LogVWrite(uintptr_t address, unsigned reg_code,
+ PrintRegisterFormat format, unsigned lane = 0) {
+ if (log_parameters() & LOG_WRITE) {
+ PrintVWrite(address, reg_code, format, lane);
+ }
}
int log_parameters() { return log_parameters_; }
@@ -596,6 +1333,14 @@ class Simulator : public DecoderVisitor {
}
}
+ // Helper functions for register tracing.
+ void PrintRegisterRawHelper(unsigned code, Reg31Mode r31mode,
+ int size_in_bytes = kXRegSize);
+ void PrintVRegisterRawHelper(unsigned code, int bytes = kQRegSize,
+ int lsb = 0);
+ void PrintVRegisterFPHelper(unsigned code, unsigned lane_size_in_bytes,
+ int lane_count = 1, int rightmost_lane = 0);
+
static inline const char* WRegNameForCode(unsigned code,
Reg31Mode mode = Reg31IsZeroRegister);
static inline const char* XRegNameForCode(unsigned code,
@@ -670,6 +1415,10 @@ class Simulator : public DecoderVisitor {
void LoadStoreWriteBack(unsigned addr_reg,
int64_t offset,
AddrMode addrmode);
+ void NEONLoadStoreMultiStructHelper(const Instruction* instr,
+ AddrMode addr_mode);
+ void NEONLoadStoreSingleStructHelper(const Instruction* instr,
+ AddrMode addr_mode);
void CheckMemoryAccess(uintptr_t address, uintptr_t stack);
// Memory read helpers.
@@ -677,7 +1426,8 @@ class Simulator : public DecoderVisitor {
T MemoryRead(A address) {
T value;
STATIC_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
- (sizeof(value) == 4) || (sizeof(value) == 8));
+ (sizeof(value) == 4) || (sizeof(value) == 8) ||
+ (sizeof(value) == 16));
memcpy(&value, reinterpret_cast<const void*>(address), sizeof(value));
return value;
}
@@ -686,7 +1436,8 @@ class Simulator : public DecoderVisitor {
template <typename T, typename A>
void MemoryWrite(A address, T value) {
STATIC_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) ||
- (sizeof(value) == 4) || (sizeof(value) == 8));
+ (sizeof(value) == 4) || (sizeof(value) == 8) ||
+ (sizeof(value) == 16));
memcpy(reinterpret_cast<void*>(address), &value, sizeof(value));
}
@@ -704,14 +1455,652 @@ class Simulator : public DecoderVisitor {
void DataProcessing2Source(Instruction* instr);
template <typename T>
void BitfieldHelper(Instruction* instr);
+ uint16_t PolynomialMult(uint8_t op1, uint8_t op2);
+
+ void ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr);
+ void ld1(VectorFormat vform, LogicVRegister dst, int index, uint64_t addr);
+ void ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr);
+ void ld2(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2,
+ uint64_t addr);
+ void ld2(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2,
+ int index, uint64_t addr);
+ void ld2r(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2,
+ uint64_t addr);
+ void ld3(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2,
+ LogicVRegister dst3, uint64_t addr);
+ void ld3(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2,
+ LogicVRegister dst3, int index, uint64_t addr);
+ void ld3r(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2,
+ LogicVRegister dst3, uint64_t addr);
+ void ld4(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2,
+ LogicVRegister dst3, LogicVRegister dst4, uint64_t addr);
+ void ld4(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2,
+ LogicVRegister dst3, LogicVRegister dst4, int index, uint64_t addr);
+ void ld4r(VectorFormat vform, LogicVRegister dst1, LogicVRegister dst2,
+ LogicVRegister dst3, LogicVRegister dst4, uint64_t addr);
+ void st1(VectorFormat vform, LogicVRegister src, uint64_t addr);
+ void st1(VectorFormat vform, LogicVRegister src, int index, uint64_t addr);
+ void st2(VectorFormat vform, LogicVRegister src, LogicVRegister src2,
+ uint64_t addr);
+ void st2(VectorFormat vform, LogicVRegister src, LogicVRegister src2,
+ int index, uint64_t addr);
+ void st3(VectorFormat vform, LogicVRegister src, LogicVRegister src2,
+ LogicVRegister src3, uint64_t addr);
+ void st3(VectorFormat vform, LogicVRegister src, LogicVRegister src2,
+ LogicVRegister src3, int index, uint64_t addr);
+ void st4(VectorFormat vform, LogicVRegister src, LogicVRegister src2,
+ LogicVRegister src3, LogicVRegister src4, uint64_t addr);
+ void st4(VectorFormat vform, LogicVRegister src, LogicVRegister src2,
+ LogicVRegister src3, LogicVRegister src4, int index, uint64_t addr);
+ LogicVRegister cmp(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ Condition cond);
+ LogicVRegister cmp(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, int imm, Condition cond);
+ LogicVRegister cmptst(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister add(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister addp(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister mla(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister mls(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister mul(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister mul(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister mla(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister mls(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister pmul(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+
+ typedef LogicVRegister (Simulator::*ByElementOp)(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ int index);
+ LogicVRegister fmul(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister fmla(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister fmls(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister fmulx(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister smull(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister smull2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister umull(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister umull2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister smlal(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister smlal2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister umlal(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister umlal2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister smlsl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister smlsl2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister umlsl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister umlsl2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister sqdmull(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister sqdmull2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2, int index);
+ LogicVRegister sqdmlal(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister sqdmlal2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2, int index);
+ LogicVRegister sqdmlsl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister sqdmlsl2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2, int index);
+ LogicVRegister sqdmulh(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister sqrdmulh(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2, int index);
+ LogicVRegister sub(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister and_(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister orr(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister orn(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister eor(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister bic(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister bic(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, uint64_t imm);
+ LogicVRegister bif(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister bit(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister bsl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister cls(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister clz(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister cnt(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister not_(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister rbit(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister rev(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int revSize);
+ LogicVRegister rev16(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister rev32(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister rev64(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister addlp(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, bool is_signed,
+ bool do_accumulate);
+ LogicVRegister saddlp(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister uaddlp(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister sadalp(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister uadalp(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister ext(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ int index);
+ LogicVRegister ins_element(VectorFormat vform, LogicVRegister dst,
+ int dst_index, const LogicVRegister& src,
+ int src_index);
+ LogicVRegister ins_immediate(VectorFormat vform, LogicVRegister dst,
+ int dst_index, uint64_t imm);
+ LogicVRegister dup_element(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int src_index);
+ LogicVRegister dup_immediate(VectorFormat vform, LogicVRegister dst,
+ uint64_t imm);
+ LogicVRegister movi(VectorFormat vform, LogicVRegister dst, uint64_t imm);
+ LogicVRegister mvni(VectorFormat vform, LogicVRegister dst, uint64_t imm);
+ LogicVRegister orr(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, uint64_t imm);
+ LogicVRegister sshl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister ushl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister SMinMax(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ bool max);
+ LogicVRegister smax(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister smin(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister SMinMaxP(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2, bool max);
+ LogicVRegister smaxp(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister sminp(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister addp(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister addv(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister uaddlv(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister saddlv(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister SMinMaxV(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, bool max);
+ LogicVRegister smaxv(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister sminv(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister uxtl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister uxtl2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister sxtl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister sxtl2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister Table(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& ind, bool zero_out_of_bounds,
+ const LogicVRegister* tab1,
+ const LogicVRegister* tab2 = NULL,
+ const LogicVRegister* tab3 = NULL,
+ const LogicVRegister* tab4 = NULL);
+ LogicVRegister tbl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& tab, const LogicVRegister& ind);
+ LogicVRegister tbl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& tab, const LogicVRegister& tab2,
+ const LogicVRegister& ind);
+ LogicVRegister tbl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& tab, const LogicVRegister& tab2,
+ const LogicVRegister& tab3, const LogicVRegister& ind);
+ LogicVRegister tbl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& tab, const LogicVRegister& tab2,
+ const LogicVRegister& tab3, const LogicVRegister& tab4,
+ const LogicVRegister& ind);
+ LogicVRegister tbx(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& tab, const LogicVRegister& ind);
+ LogicVRegister tbx(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& tab, const LogicVRegister& tab2,
+ const LogicVRegister& ind);
+ LogicVRegister tbx(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& tab, const LogicVRegister& tab2,
+ const LogicVRegister& tab3, const LogicVRegister& ind);
+ LogicVRegister tbx(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& tab, const LogicVRegister& tab2,
+ const LogicVRegister& tab3, const LogicVRegister& tab4,
+ const LogicVRegister& ind);
+ LogicVRegister uaddl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister uaddl2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister uaddw(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister uaddw2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister saddl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister saddl2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister saddw(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister saddw2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister usubl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister usubl2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister usubw(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister usubw2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister ssubl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister ssubl2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister ssubw(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister ssubw2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister UMinMax(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ bool max);
+ LogicVRegister umax(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister umin(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister UMinMaxP(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2, bool max);
+ LogicVRegister umaxp(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister uminp(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister UMinMaxV(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, bool max);
+ LogicVRegister umaxv(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister uminv(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister trn1(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister trn2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister zip1(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister zip2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister uzp1(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister uzp2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister shl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister scvtf(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int fbits,
+ FPRounding rounding_mode);
+ LogicVRegister ucvtf(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int fbits,
+ FPRounding rounding_mode);
+ LogicVRegister sshll(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister sshll2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister shll(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister shll2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister ushll(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister ushll2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister sli(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister sri(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister sshr(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister ushr(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister ssra(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister usra(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister srsra(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister ursra(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister suqadd(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister usqadd(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister sqshl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister uqshl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister sqshlu(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister abs(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister neg(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister ExtractNarrow(VectorFormat vform, LogicVRegister dst,
+ bool dstIsSigned, const LogicVRegister& src,
+ bool srcIsSigned);
+ LogicVRegister xtn(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister sqxtn(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister uqxtn(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister sqxtun(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister AbsDiff(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ bool issigned);
+ LogicVRegister saba(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister uaba(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister shrn(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister shrn2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister rshrn(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister rshrn2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister uqshrn(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister uqshrn2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister uqrshrn(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister uqrshrn2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister sqshrn(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister sqshrn2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister sqrshrn(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister sqrshrn2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister sqshrun(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister sqshrun2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister sqrshrun(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister sqrshrun2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, int shift);
+ LogicVRegister sqrdmulh(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2, bool round = true);
+ LogicVRegister sqdmulh(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+#define NEON_3VREG_LOGIC_LIST(V) \
+ V(addhn) \
+ V(addhn2) \
+ V(raddhn) \
+ V(raddhn2) \
+ V(subhn) \
+ V(subhn2) \
+ V(rsubhn) \
+ V(rsubhn2) \
+ V(pmull) \
+ V(pmull2) \
+ V(sabal) \
+ V(sabal2) \
+ V(uabal) \
+ V(uabal2) \
+ V(sabdl) \
+ V(sabdl2) \
+ V(uabdl) \
+ V(uabdl2) \
+ V(smull) \
+ V(smull2) \
+ V(umull) \
+ V(umull2) \
+ V(smlal) \
+ V(smlal2) \
+ V(umlal) \
+ V(umlal2) \
+ V(smlsl) \
+ V(smlsl2) \
+ V(umlsl) \
+ V(umlsl2) \
+ V(sqdmlal) \
+ V(sqdmlal2) \
+ V(sqdmlsl) \
+ V(sqdmlsl2) \
+ V(sqdmull) \
+ V(sqdmull2)
+
+#define DEFINE_LOGIC_FUNC(FXN) \
+ LogicVRegister FXN(VectorFormat vform, LogicVRegister dst, \
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ NEON_3VREG_LOGIC_LIST(DEFINE_LOGIC_FUNC)
+#undef DEFINE_LOGIC_FUNC
+
+#define NEON_FP3SAME_LIST(V) \
+ V(fadd, FPAdd, false) \
+ V(fsub, FPSub, true) \
+ V(fmul, FPMul, true) \
+ V(fmulx, FPMulx, true) \
+ V(fdiv, FPDiv, true) \
+ V(fmax, FPMax, false) \
+ V(fmin, FPMin, false) \
+ V(fmaxnm, FPMaxNM, false) \
+ V(fminnm, FPMinNM, false)
+
+#define DECLARE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
+ template <typename T> \
+ LogicVRegister FN(VectorFormat vform, LogicVRegister dst, \
+ const LogicVRegister& src1, const LogicVRegister& src2); \
+ LogicVRegister FN(VectorFormat vform, LogicVRegister dst, \
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ NEON_FP3SAME_LIST(DECLARE_NEON_FP_VECTOR_OP)
+#undef DECLARE_NEON_FP_VECTOR_OP
+
+#define NEON_FPPAIRWISE_LIST(V) \
+ V(faddp, fadd, FPAdd) \
+ V(fmaxp, fmax, FPMax) \
+ V(fmaxnmp, fmaxnm, FPMaxNM) \
+ V(fminp, fmin, FPMin) \
+ V(fminnmp, fminnm, FPMinNM)
+
+#define DECLARE_NEON_FP_PAIR_OP(FNP, FN, OP) \
+ LogicVRegister FNP(VectorFormat vform, LogicVRegister dst, \
+ const LogicVRegister& src1, const LogicVRegister& src2); \
+ LogicVRegister FNP(VectorFormat vform, LogicVRegister dst, \
+ const LogicVRegister& src);
+ NEON_FPPAIRWISE_LIST(DECLARE_NEON_FP_PAIR_OP)
+#undef DECLARE_NEON_FP_PAIR_OP
+
+ template <typename T>
+ LogicVRegister frecps(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister frecps(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ template <typename T>
+ LogicVRegister frsqrts(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ LogicVRegister frsqrts(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+ template <typename T>
+ LogicVRegister fmla(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister fmla(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ template <typename T>
+ LogicVRegister fmls(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister fmls(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister fnmul(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
template <typename T>
- T FPDefaultNaN() const;
+ LogicVRegister fcmp(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ Condition cond);
+ LogicVRegister fcmp(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ Condition cond);
+ LogicVRegister fabscmp(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2,
+ Condition cond);
+ LogicVRegister fcmp_zero(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, Condition cond);
+
+ template <typename T>
+ LogicVRegister fneg(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fneg(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ template <typename T>
+ LogicVRegister frecpx(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister frecpx(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ template <typename T>
+ LogicVRegister fabs_(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fabs_(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fabd(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src1, const LogicVRegister& src2);
+ LogicVRegister frint(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, FPRounding rounding_mode,
+ bool inexact_exception = false);
+ LogicVRegister fcvts(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, FPRounding rounding_mode,
+ int fbits = 0);
+ LogicVRegister fcvtu(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, FPRounding rounding_mode,
+ int fbits = 0);
+ LogicVRegister fcvtl(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fcvtl2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fcvtn(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fcvtn2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fcvtxn(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fcvtxn2(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fsqrt(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister frsqrte(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister frecpe(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, FPRounding rounding);
+ LogicVRegister ursqrte(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister urecpe(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+
+ typedef float (Simulator::*FPMinMaxOp)(float a, float b);
+
+ LogicVRegister FMinMaxV(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src, FPMinMaxOp Op);
+
+ LogicVRegister fminv(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fmaxv(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fminnmv(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+ LogicVRegister fmaxnmv(VectorFormat vform, LogicVRegister dst,
+ const LogicVRegister& src);
+
+ template <typename T>
+ T FPRecipSqrtEstimate(T op);
+ template <typename T>
+ T FPRecipEstimate(T op, FPRounding rounding);
+ template <typename T, typename R>
+ R FPToFixed(T op, int fbits, bool is_signed, FPRounding rounding);
void FPCompare(double val0, double val1);
double FPRoundInt(double value, FPRounding round_mode);
double FPToDouble(float value);
float FPToFloat(double value, FPRounding round_mode);
+ float FPToFloat(float16 value);
+ float16 FPToFloat16(float value, FPRounding round_mode);
+ float16 FPToFloat16(double value, FPRounding round_mode);
+ double recip_sqrt_estimate(double a);
+ double recip_estimate(double a);
+ double FPRecipSqrtEstimate(double a);
+ double FPRecipEstimate(double a);
double FixedToDouble(int64_t src, int fbits, FPRounding round_mode);
double UFixedToDouble(uint64_t src, int fbits, FPRounding round_mode);
float FixedToFloat(int64_t src, int fbits, FPRounding round_mode);
@@ -743,6 +2132,9 @@ class Simulator : public DecoderVisitor {
T FPMul(T op1, T op2);
template <typename T>
+ T FPMulx(T op1, T op2);
+
+ template <typename T>
T FPMulAdd(T a, T op1, T op2);
template <typename T>
@@ -751,17 +2143,18 @@ class Simulator : public DecoderVisitor {
template <typename T>
T FPSub(T op1, T op2);
- // Standard NaN processing.
template <typename T>
- T FPProcessNaN(T op);
-
- bool FPProcessNaNs(Instruction* instr);
+ T FPRecipStepFused(T op1, T op2);
template <typename T>
- T FPProcessNaNs(T op1, T op2);
+ T FPRSqrtStepFused(T op1, T op2);
- template <typename T>
- T FPProcessNaNs3(T op1, T op2, T op3);
+ // This doesn't do anything at the moment. We'll need it if we want support
+ // for cumulative exception bits or floating-point exceptions.
+ void FPProcessException() {}
+
+ // Standard NaN processing.
+ bool FPProcessNaNs(Instruction* instr);
void CheckStackAlignment();
@@ -773,7 +2166,7 @@ class Simulator : public DecoderVisitor {
static const uint64_t kCallerSavedRegisterCorruptionValue =
0xca11edc0de000000UL;
// This value is a NaN in both 32-bit and 64-bit FP.
- static const uint64_t kCallerSavedFPRegisterCorruptionValue =
+ static const uint64_t kCallerSavedVRegisterCorruptionValue =
0x7ff000007f801000UL;
// This value is a mix of 32/64-bits NaN and "verbose" immediate.
static const uint64_t kDefaultCPURegisterCorruptionValue =
@@ -801,7 +2194,7 @@ class Simulator : public DecoderVisitor {
SimRegister registers_[kNumberOfRegisters];
// Floating point registers
- SimFPRegister fpregisters_[kNumberOfFPRegisters];
+ SimVRegister vregisters_[kNumberOfVRegisters];
// Processor state
// bits[31, 27]: Condition flags N, Z, C, and V.
@@ -959,10 +2352,67 @@ class Simulator : public DecoderVisitor {
private:
void Init(FILE* stream);
+ template <typename T>
+ static T FPDefaultNaN();
+
+ template <typename T>
+ T FPProcessNaN(T op) {
+ DCHECK(std::isnan(op));
+ return fpcr().DN() ? FPDefaultNaN<T>() : ToQuietNaN(op);
+ }
+
+ template <typename T>
+ T FPProcessNaNs(T op1, T op2) {
+ if (IsSignallingNaN(op1)) {
+ return FPProcessNaN(op1);
+ } else if (IsSignallingNaN(op2)) {
+ return FPProcessNaN(op2);
+ } else if (std::isnan(op1)) {
+ DCHECK(IsQuietNaN(op1));
+ return FPProcessNaN(op1);
+ } else if (std::isnan(op2)) {
+ DCHECK(IsQuietNaN(op2));
+ return FPProcessNaN(op2);
+ } else {
+ return 0.0;
+ }
+ }
+
+ template <typename T>
+ T FPProcessNaNs3(T op1, T op2, T op3) {
+ if (IsSignallingNaN(op1)) {
+ return FPProcessNaN(op1);
+ } else if (IsSignallingNaN(op2)) {
+ return FPProcessNaN(op2);
+ } else if (IsSignallingNaN(op3)) {
+ return FPProcessNaN(op3);
+ } else if (std::isnan(op1)) {
+ DCHECK(IsQuietNaN(op1));
+ return FPProcessNaN(op1);
+ } else if (std::isnan(op2)) {
+ DCHECK(IsQuietNaN(op2));
+ return FPProcessNaN(op2);
+ } else if (std::isnan(op3)) {
+ DCHECK(IsQuietNaN(op3));
+ return FPProcessNaN(op3);
+ } else {
+ return 0.0;
+ }
+ }
+
int log_parameters_;
Isolate* isolate_;
};
+template <>
+inline double Simulator::FPDefaultNaN<double>() {
+ return kFP64DefaultNaN;
+}
+
+template <>
+inline float Simulator::FPDefaultNaN<float>() {
+ return kFP32DefaultNaN;
+}
// When running with the simulator transition into simulated execution at this
// point.
« no previous file with comments | « src/arm64/macro-assembler-arm64-inl.h ('k') | src/arm64/simulator-arm64.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698